Open-Meteo API data Analysis (Nashville, TN)¶
In [106]:
# Notes :
# Location to be analyzed, located at similar Latitude and geographics, but has very different weather (rain precipitation):
# a. Las Vegas (dry), coord: 36.1716° N, 115.1391° W
# b. Nashville (wet), coord: 36.1627° N, 86.7816° W
###############################################################################################
import openmeteo_requests
import requests_cache
from retry_requests import retry
import pandas as pd
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": 36.1659,
"longitude": -86.7844,
"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation_probability", "precipitation", "rain", "showers", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "visibility", "wind_speed_10m", "soil_moisture_0_to_1cm", "soil_moisture_1_to_3cm", "soil_moisture_3_to_9cm", "soil_moisture_9_to_27cm", "soil_moisture_27_to_81cm", "evapotranspiration", "snowfall", "snow_depth", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_80m", "wind_speed_120m", "wind_speed_180m", "wind_direction_10m", "wind_direction_80m", "wind_direction_120m", "wind_direction_180m", "temperature_80m", "temperature_120m", "temperature_180m", "soil_temperature_0cm", "soil_temperature_6cm", "soil_temperature_18cm", "soil_temperature_54cm", "wind_gusts_10m"],
"past_days": 92,
"forecast_days": 1
}
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation_probability = hourly.Variables(4).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(5).ValuesAsNumpy()
hourly_rain = hourly.Variables(6).ValuesAsNumpy()
hourly_showers = hourly.Variables(7).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(8).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(11).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(12).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(13).ValuesAsNumpy()
hourly_visibility = hourly.Variables(14).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(15).ValuesAsNumpy()
hourly_soil_moisture_0_to_1cm = hourly.Variables(16).ValuesAsNumpy()
hourly_soil_moisture_1_to_3cm = hourly.Variables(17).ValuesAsNumpy()
hourly_soil_moisture_3_to_9cm = hourly.Variables(18).ValuesAsNumpy()
hourly_soil_moisture_9_to_27cm = hourly.Variables(19).ValuesAsNumpy()
hourly_soil_moisture_27_to_81cm = hourly.Variables(20).ValuesAsNumpy()
hourly_evapotranspiration = hourly.Variables(21).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(22).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(23).ValuesAsNumpy()
hourly_et0_fao_evapotranspiration = hourly.Variables(24).ValuesAsNumpy()
hourly_vapour_pressure_deficit = hourly.Variables(25).ValuesAsNumpy()
hourly_wind_speed_80m = hourly.Variables(26).ValuesAsNumpy()
hourly_wind_speed_120m = hourly.Variables(27).ValuesAsNumpy()
hourly_wind_speed_180m = hourly.Variables(28).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(29).ValuesAsNumpy()
hourly_wind_direction_80m = hourly.Variables(30).ValuesAsNumpy()
hourly_wind_direction_120m = hourly.Variables(31).ValuesAsNumpy()
hourly_wind_direction_180m = hourly.Variables(32).ValuesAsNumpy()
hourly_temperature_80m = hourly.Variables(33).ValuesAsNumpy()
hourly_temperature_120m = hourly.Variables(34).ValuesAsNumpy()
hourly_temperature_180m = hourly.Variables(35).ValuesAsNumpy()
hourly_soil_temperature_0cm = hourly.Variables(36).ValuesAsNumpy()
hourly_soil_temperature_6cm = hourly.Variables(37).ValuesAsNumpy()
hourly_soil_temperature_18cm = hourly.Variables(38).ValuesAsNumpy()
hourly_soil_temperature_54cm = hourly.Variables(39).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(40).ValuesAsNumpy()
hourly_data = {"date": pd.date_range(
start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
freq = pd.Timedelta(seconds = hourly.Interval()),
inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation_probability"] = hourly_precipitation_probability
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["showers"] = hourly_showers
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["visibility"] = hourly_visibility
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["soil_moisture_0_to_1cm"] = hourly_soil_moisture_0_to_1cm
hourly_data["soil_moisture_1_to_3cm"] = hourly_soil_moisture_1_to_3cm
hourly_data["soil_moisture_3_to_9cm"] = hourly_soil_moisture_3_to_9cm
hourly_data["soil_moisture_9_to_27cm"] = hourly_soil_moisture_9_to_27cm
hourly_data["soil_moisture_27_to_81cm"] = hourly_soil_moisture_27_to_81cm
hourly_data["evapotranspiration"] = hourly_evapotranspiration
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
hourly_data["wind_speed_80m"] = hourly_wind_speed_80m
hourly_data["wind_speed_120m"] = hourly_wind_speed_120m
hourly_data["wind_speed_180m"] = hourly_wind_speed_180m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_direction_80m"] = hourly_wind_direction_80m
hourly_data["wind_direction_120m"] = hourly_wind_direction_120m
hourly_data["wind_direction_180m"] = hourly_wind_direction_180m
hourly_data["temperature_80m"] = hourly_temperature_80m
hourly_data["temperature_120m"] = hourly_temperature_120m
hourly_data["temperature_180m"] = hourly_temperature_180m
hourly_data["soil_temperature_0cm"] = hourly_soil_temperature_0cm
hourly_data["soil_temperature_6cm"] = hourly_soil_temperature_6cm
hourly_data["soil_temperature_18cm"] = hourly_soil_temperature_18cm
hourly_data["soil_temperature_54cm"] = hourly_soil_temperature_54cm
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)
Coordinates 36.16115188598633°N -86.79808807373047°E
Elevation 158.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
date temperature_2m relative_humidity_2m \
0 2025-03-01 00:00:00+00:00 NaN NaN
1 2025-03-01 01:00:00+00:00 NaN NaN
2 2025-03-01 02:00:00+00:00 NaN NaN
3 2025-03-01 03:00:00+00:00 NaN NaN
4 2025-03-01 04:00:00+00:00 NaN NaN
... ... ... ...
2227 2025-06-01 19:00:00+00:00 27.602501 45.0
2228 2025-06-01 20:00:00+00:00 28.002501 45.0
2229 2025-06-01 21:00:00+00:00 28.152500 44.0
2230 2025-06-01 22:00:00+00:00 27.902500 45.0
2231 2025-06-01 23:00:00+00:00 27.252501 47.0
dew_point_2m apparent_temperature precipitation_probability \
0 NaN NaN 0.0
1 NaN NaN 0.0
2 NaN NaN 0.0
3 NaN NaN 0.0
4 NaN NaN 0.0
... ... ... ...
2227 14.604142 28.444843 8.0
2228 14.966615 28.638031 6.0
2229 14.753403 28.290123 3.0
2230 14.876000 27.838562 4.0
2231 14.960908 27.168846 9.0
precipitation rain showers pressure_msl surface_pressure \
0 NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN
... ... ... ... ... ...
2227 0.0 0.0 0.0 1012.200012 994.226318
2228 0.0 0.0 0.0 1011.900024 993.955139
2229 0.0 0.0 0.0 1011.700012 993.767639
2230 0.0 0.0 0.0 1011.700012 993.752747
2231 0.0 0.0 0.0 1011.500000 993.517761
cloud_cover cloud_cover_low cloud_cover_mid cloud_cover_high \
0 NaN NaN NaN NaN
1 NaN NaN NaN NaN
2 NaN NaN NaN NaN
3 NaN NaN NaN NaN
4 NaN NaN NaN NaN
... ... ... ... ...
2227 24.0 24.0 0.0 0.0
2228 24.0 24.0 0.0 0.0
2229 23.0 23.0 0.0 0.0
2230 28.0 28.0 0.0 0.0
2231 4.0 4.0 0.0 0.0
visibility wind_speed_10m soil_moisture_0_to_1cm \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 42600.0 12.849528 0.275
2228 43400.0 12.574260 0.274
2229 44200.0 11.659777 0.272
2230 42900.0 11.525623 0.271
2231 40600.0 11.885453 0.271
soil_moisture_1_to_3cm soil_moisture_3_to_9cm soil_moisture_9_to_27cm \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 0.280 0.288 0.304
2228 0.278 0.287 0.303
2229 0.276 0.285 0.303
2230 0.275 0.284 0.302
2231 0.275 0.283 0.302
soil_moisture_27_to_81cm evapotranspiration snowfall snow_depth \
0 NaN 0.0 NaN NaN
1 NaN 0.0 NaN NaN
2 NaN 0.0 NaN NaN
3 NaN 0.0 NaN NaN
4 NaN 0.0 NaN NaN
... ... ... ... ...
2227 0.332 0.0 0.0 0.0
2228 0.332 0.0 0.0 0.0
2229 0.332 0.0 0.0 0.0
2230 0.332 0.0 0.0 0.0
2231 0.332 0.0 0.0 0.0
et0_fao_evapotranspiration vapour_pressure_deficit wind_speed_80m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 0.576654 2.030955 15.745627
2228 0.537299 2.078797 15.111424
2229 0.473631 2.135164 14.264361
2230 0.396486 2.066745 14.400000
2231 0.279190 1.917439 15.480000
wind_speed_120m wind_speed_180m wind_direction_10m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 16.506481 14.830076 258.690094
2228 18.276533 12.849528 256.759460
2229 22.435301 11.384199 261.119415
2230 21.315056 11.269782 271.789856
2231 21.085789 14.773164 271.735657
wind_direction_80m wind_direction_120m wind_direction_180m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 259.460876 286.821503 275.572113
2228 257.619263 300.173431 281.309906
2229 259.824554 301.607452 288.435028
2230 270.000000 305.882233 296.564972
2231 270.000000 318.532379 313.025085
temperature_80m temperature_120m temperature_180m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 25.410500 25.210501 23.867500
2228 25.760500 25.510500 24.267500
2229 25.260500 25.010500 24.767500
2230 24.310501 24.110500 25.167501
2231 23.510500 23.310501 24.867500
soil_temperature_0cm soil_temperature_6cm soil_temperature_18cm \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 30.217501 26.917501 21.917501
2228 31.567501 27.067501 22.317501
2229 29.367500 27.167501 22.717501
2230 28.267500 26.867500 23.017500
2231 26.867500 26.467501 23.267500
soil_temperature_54cm wind_gusts_10m
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
... ... ...
2227 19.917501 16.559999
2228 19.917501 16.199999
2229 19.917501 14.759999
2230 19.967501 15.480000
2231 19.967501 16.559999
[2232 rows x 42 columns]
In [ ]:
In [3]:
# Store the data into the CSV file
path1 = r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\Global_Weather\openmeteo_data_nashville_tn.csv'
hourly_dataframe.to_csv(path1, header='column_names')
Geolocation of Las Vegas, NV and Nashville, TN¶
In [1]:
# Visualize geolocation by Latitude and Longitude of Las Vegas and Nashville
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20,10)
# Load a world map shapefile (built-in dataset)
world = gpd.read_file(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\110m_cultural\ne_110m_admin_0_countries.shp')
worldpop = gpd.read_file(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\110m_cultural\ne_110m_populated_places.shp')
# a. Las Vegas (dry), coord: 36.1716° N, 115.1391° W
# b. Nashville (wet), coord: 36.1627° N, 86.7816° W
data = {'longitude': [-115.1391, -86.7816],
'latitude': [36.1716, 36.1627],
'value': [10, 20]}
df = pd.DataFrame(data)
# Create geometry column
geometry = gpd.points_from_xy(df['longitude'], df['latitude'], crs="EPSG:4326")
# Create GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=geometry)
print(gdf)
usapop = worldpop[worldpop['ADM0NAME'] == 'United States of America']
# # Plotting multiple layers
fig, ax = plt.subplots()
plt.title('Geolocation of Las Vegas and Nashville')
world.plot(ax = ax, cmap = 'hsv', edgecolor = 'black', column = 'SOVEREIGNT')
gdf.plot(ax = ax, color = 'yellow', markersize = 40) # The location of Las Vegas and Nashville can be seen as yellow dots in the map
longitude latitude value geometry 0 -115.1391 36.1716 10 POINT (-115.1391 36.1716) 1 -86.7816 36.1627 20 POINT (-86.7816 36.1627)
Out[1]:
<Axes: title={'center': 'Geolocation of Las Vegas and Nashville'}>
In [2]:
# Visualize Las Vegas (and Nevada) and Nashville (and Tennessee)
# States are depicted with blue color
# Cities are depicted with yellow color
stateprov = gpd.read_file(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\110m_cultural\ne_110m_admin_1_states_provinces.shp')
usamain = stateprov[stateprov["name"] != "Alaska"]
usamain = usamain[usamain["name"] != "Hawaii"]
nevada = stateprov[stateprov["name"] == "Nevada"]
tennessee = stateprov[stateprov["name"] == "Tennessee"]
# Add Column 'coords' for labelling
nevada['coords'] = nevada['geometry'].apply(lambda x: x.representative_point().coords[:])
nevada['coords'] = [coords[0] for coords in nevada['coords']]
tennessee['coords'] = tennessee['geometry'].apply(lambda x: x.representative_point().coords[:])
tennessee['coords'] = [coords[0] for coords in tennessee['coords']]
# # Plotting multiple layers
fig, ax = plt.subplots()
for idx, row in nevada.iterrows():
plt.annotate(text=row['name'], xy=row['coords'],
horizontalalignment='center')
for idx, row in tennessee.iterrows():
plt.annotate(text=row['name'], xy=row['coords'],
horizontalalignment='center')
usamain.plot(ax = ax, color='green', edgecolor = 'black')
nevada.plot(ax = ax, color = 'blue', markersize = 40)
tennessee.plot(ax = ax, color = 'blue', markersize = 40)
gdf.plot(ax = ax, color = 'yellow', markersize = 40)
C:\ProgramData\anaconda3\Lib\site-packages\geopandas\geodataframe.py:1981: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value) C:\ProgramData\anaconda3\Lib\site-packages\geopandas\geodataframe.py:1981: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
Out[2]:
<Axes: >
In [124]:
# Maps of Nashville
import pandas as pd
import geopandas as gpd
gdf = gpd.read_file(r"C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\cities.geojson")
nashvilledf = gdf.loc[gdf['NAME'].isin(['NASHVILLE'])]
# Remove all unwanted Nashvilles, just leave the one with Index 23012, Nashville in Tennessee state.
# We use drop Method coz this Method doesn't change the GeoDataframe into Series.
# If we use .iloc or .loc Method, it will change the GeoDataframe into Series.
nashvilledf = nashvilledf.drop(20793)
nashvilledf = nashvilledf.drop(22744)
nashvilledf = nashvilledf.drop(23065)
nashvilledf = nashvilledf.drop(23759)
nashvilledf.explore()
Out[124]:
Make this Notebook Trusted to load map: File -> Trust Notebook
Open-Meteo Data Analysis¶
In [4]:
# Read from the CSV file
import pandas as pd
import matplotlib.pyplot as plt
path1 = r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\Global_Weather\openmeteo_data_nashville_tn.csv'
df1 = pd.read_csv(path1)
df1
Out[4]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | ... | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2025-02-23 00:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 1 | 2025-02-23 01:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 2 | 2025-02-23 02:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 3 | 2025-02-23 03:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 4 | 2025-02-23 04:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 2227 | 2025-05-26 19:00:00+00:00 | 21.5025 | 76.0 | 17.093630 | 23.275127 | 3.0 | 0.0 | 0.0 | 0.0 | ... | 88.898320 | 63.904633 | 21.160500 | 20.9105 | 19.767500 | 25.567501 | 23.167501 | 19.467500 | 19.3675 | 9.360000 |
| 2228 | 2228 | 2025-05-26 20:00:00+00:00 | 21.9025 | 77.0 | 17.687681 | 23.821278 | 2.0 | 0.0 | 0.0 | 0.0 | ... | 83.774260 | 67.067870 | 20.560501 | 20.3605 | 20.667501 | 25.567501 | 23.267500 | 19.817501 | 19.3675 | 8.640000 |
| 2229 | 2229 | 2025-05-26 21:00:00+00:00 | 22.0025 | 76.0 | 17.577087 | 23.358010 | 3.0 | 0.0 | 0.0 | 0.0 | ... | 85.236440 | 65.725570 | 20.210500 | 20.0105 | 20.867500 | 24.367500 | 23.017500 | 20.117500 | 19.3675 | 12.599999 |
| 2230 | 2230 | 2025-05-26 22:00:00+00:00 | 21.5525 | 75.0 | 16.932825 | 22.161495 | 6.0 | 0.0 | 0.0 | 0.0 | ... | 99.039406 | 68.629310 | 19.360500 | 19.1605 | 21.167501 | 24.967500 | 22.967500 | 20.367500 | 19.3675 | 19.800000 |
| 2231 | 2231 | 2025-05-26 23:00:00+00:00 | 20.8025 | 75.0 | 16.208752 | 21.156380 | 10.0 | 0.0 | 0.0 | 0.0 | ... | 81.416466 | 70.497470 | 17.710500 | 17.6105 | 20.967500 | 22.217500 | 22.667501 | 20.567501 | 19.3675 | 21.240000 |
2232 rows × 43 columns
In [5]:
# Remove Duplicate data from raw table
df1 = df1.drop_duplicates()
# Drop/remove all entries with blanks
# df.dropna(subset='temperature_2m', inplace=True)
# df.dropna(subset='relative_humidity_2m', inplace=True)
df1.dropna(inplace=True)
In [6]:
# Set to display all columns
pd.set_option('display.max_columns',None)
plt.rcParams['figure.figsize'] = (12, 8)
# Check whether there's still any blank rows/columns
df1[df1.isna().any(axis=1)]
Out[6]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m |
|---|
In [7]:
# Add Column 'Location'
df1['Location'] = 'Nashville'
# Split the Date into Year, Month and Date
df1[['ymd','hmstz']] = df1['date'].str.split(' ', expand=True)
df1[['year','month','day']] = df1['ymd'].str.split('-', expand=True)
df1[['hour','minute','sectz','tzsec']] = df1['hmstz'].str.split(':', expand=True)
In [8]:
# Find out the correlation among Columns to filter what Columns we'll use for analysis
# For first glance, we filter out by common logic, what Columns are possible to have strong enough correlations.
# We choose a bunch of Columns specified with Index numbers or to be more humanly readable, the Columns name.
df1h = df1[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m',
'dew_point_2m','vapour_pressure_deficit','temperature_2m','cloud_cover_high','cloud_cover_low','wind_speed_10m',
'soil_moisture_3_to_9cm','temperature_80m','soil_temperature_0cm']]
df1h
Out[8]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 581 | 0.0 | 0.0 | 0.233 | 0.240 | 53.0 | 7.163813 | 0.900061 | 16.7995 | 100.0 | 0.0 | 19.296133 | 0.257 | 17.460500 | 13.617499 |
| 582 | 0.0 | 0.0 | 0.232 | 0.240 | 53.0 | 7.071089 | 0.894375 | 16.6995 | 100.0 | 0.0 | 18.792551 | 0.257 | 17.510500 | 14.917500 |
| 583 | 0.0 | 0.0 | 0.232 | 0.240 | 57.0 | 7.343195 | 0.775166 | 15.8495 | 100.0 | 0.0 | 18.214718 | 0.256 | 16.460500 | 14.517500 |
| 584 | 0.0 | 0.0 | 0.232 | 0.240 | 59.0 | 7.379331 | 0.715844 | 15.3495 | 100.0 | 0.0 | 18.250260 | 0.256 | 15.910501 | 14.417500 |
| 585 | 0.0 | 0.0 | 0.232 | 0.240 | 61.0 | 7.208212 | 0.650970 | 14.6495 | 100.0 | 0.0 | 18.643690 | 0.256 | 15.160501 | 13.567500 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 3.0 | 33.0 | 0.265 | 0.269 | 76.0 | 17.093630 | 0.615556 | 21.5025 | 90.0 | 100.0 | 5.091168 | 0.275 | 21.160500 | 25.567501 |
| 2228 | 2.0 | 100.0 | 0.262 | 0.266 | 77.0 | 17.687681 | 0.604456 | 21.9025 | 100.0 | 98.0 | 5.860375 | 0.273 | 20.560501 | 25.567501 |
| 2229 | 3.0 | 100.0 | 0.261 | 0.264 | 76.0 | 17.577087 | 0.634595 | 22.0025 | 100.0 | 100.0 | 9.387651 | 0.271 | 20.210500 | 24.367500 |
| 2230 | 6.0 | 100.0 | 0.260 | 0.263 | 75.0 | 16.932825 | 0.643173 | 21.5525 | 100.0 | 74.0 | 12.599998 | 0.270 | 19.360500 | 24.967500 |
| 2231 | 10.0 | 100.0 | 0.259 | 0.262 | 75.0 | 16.208752 | 0.614321 | 20.8025 | 100.0 | 71.0 | 12.287555 | 0.268 | 17.710500 | 22.217500 |
1651 rows × 14 columns
In [9]:
# Create a Function to generate sorted, limited Correlation Table of a certain Column/parameter
def gencorr(param):
df1hcorr = df1h.corr(numeric_only=True).abs().sort_values(by=param, ascending=False)
df1hcorr = df1hcorr.where(df1hcorr[param] > 0.5)
df1hcorr = df1hcorr.dropna()
corresult = df1hcorr[param].round(2)
return corresult
In [10]:
# Generate Correlation Table for 'temperature_2m'
gencorr('temperature_2m') # Koreksi
Out[10]:
temperature_2m 1.00 temperature_80m 0.96 soil_temperature_0cm 0.92 vapour_pressure_deficit 0.65 dew_point_2m 0.63 Name: temperature_2m, dtype: float64
In [11]:
# Draw plot of the most correlated columns with 'temperature_2m'
# For the more than half part of the Plot, the fluctuation of the 3 parameters/columns is similar except on several points.
# This plot supports the Correlation among these three columns.
df1[['temperature_2m','dew_point_2m']].plot(subplots=[('temperature_2m','dew_point_2m')])
Out[11]:
array([<Axes: >], dtype=object)
In [12]:
# Draw plot of the most correlated column with 'temperature_2m'
# The 'temperature_2m' and 'vapour_pressure_deficit' fluctuations for the most part are similar except at several points.
df04 = df1.where(df1['month'] == '04').dropna()
df05 = df1.where(df1['month'] == '05').dropna()
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('temperature_2m vs vapour_pressure_deficit in April 2025')
ax1.plot(df04[['temperature_2m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['vapour_pressure_deficit']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('temperature_2m vs vapour_pressure_deficit in May 2025')
ax1.plot(df05[['temperature_2m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['vapour_pressure_deficit']], color='green', label='Data 2')
Out[12]:
[<matplotlib.lines.Line2D at 0x17f9d0b0c50>]
In [13]:
# Generate Correlation Table for 'relative_humidity_2m'
gencorr('relative_humidity_2m')
Out[13]:
relative_humidity_2m 1.00 vapour_pressure_deficit 0.86 soil_moisture_0_to_1cm 0.59 soil_moisture_1_to_3cm 0.57 dew_point_2m 0.56 soil_moisture_3_to_9cm 0.53 cloud_cover_low 0.52 Name: relative_humidity_2m, dtype: float64
In [14]:
# Draw plot of the most correlated column with 'relative_humidity_2m'
# The relative_humidity_2m values are the good reverse of the vapour_pressure_deficit for the most part.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('relative_humidity_2m vs vapour_pressure_deficit in April 2025')
ax1.plot(df04[['relative_humidity_2m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['vapour_pressure_deficit']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('relative_humidity_2m vs vapour_pressure_deficit in May 2025')
ax1.plot(df05[['relative_humidity_2m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['vapour_pressure_deficit']], color='green', label='Data 2')
Out[14]:
[<matplotlib.lines.Line2D at 0x17f9cfea180>]
In [ ]:
In [15]:
# Generate Correlation Table for 'dew_point_2m'
gencorr('dew_point_2m')
Out[15]:
dew_point_2m 1.00 temperature_80m 0.71 soil_temperature_0cm 0.63 temperature_2m 0.63 relative_humidity_2m 0.56 Name: dew_point_2m, dtype: float64
In [16]:
# Draw plot of the most correlated column with 'dew_point_2m'
# The 'dew_point_2m' and 'temperature_2m' fluctuations for more than half part are similar except at several points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['dew_point_2m','temperature_2m']].plot(title='dew_point_2m vs temperature_2m in April 2025')
df05[['dew_point_2m','temperature_2m']].plot(title='dew_point_2m vs temperature_2m in May 2025')
Out[16]:
<Axes: title={'center': 'dew_point_2m vs temperature_2m in May 2025'}>
In [17]:
# Generate Correlation Table for 'precipitation_probability'
gencorr('precipitation_probability')
Out[17]:
precipitation_probability 1.00 cloud_cover_mid 0.51 Name: precipitation_probability, dtype: float64
In [18]:
# Draw plot of the most correlated column with 'precipitation_probability'
# The 'precipitation_probability' and 'cloud_cover_mid' fluctuations for the half part are correlated.
import matplotlib.pyplot as plt
df04[['precipitation_probability','cloud_cover_mid']].plot(title='precipitation_probability vs cloud_cover_mid in April 2025')
df05[['precipitation_probability','cloud_cover_mid']].plot(title='precipitation_probability vs cloud_cover_mid in May 2025')
Out[18]:
<Axes: title={'center': 'precipitation_probability vs cloud_cover_mid in May 2025'}>
In [19]:
# Generate Correlation Table for 'cloud_cover_low'
gencorr('cloud_cover_low')
Out[19]:
cloud_cover_low 1.00 relative_humidity_2m 0.52 Name: cloud_cover_low, dtype: float64
In [20]:
# Draw plot of the most correlated column with 'cloud_cover_low'
# The 'cloud_cover_low' and 'cloud_cover' fluctuations for more than half part are similar except at several points.
import matplotlib.pyplot as plt
df04[['cloud_cover_low','cloud_cover']].plot(title='cloud_cover_low vs cloud_cover in April 2025')
df05[['cloud_cover_low','cloud_cover']].plot(title='cloud_cover_low vs cloud_cover in May 2025')
Out[20]:
<Axes: title={'center': 'cloud_cover_low vs cloud_cover in May 2025'}>
In [21]:
# Generate Correlation Table for 'cloud_cover_mid'
gencorr('cloud_cover_mid')
Out[21]:
cloud_cover_mid 1.00 cloud_cover_high 0.57 precipitation_probability 0.51 Name: cloud_cover_mid, dtype: float64
In [22]:
# Draw plot of the most correlated column with 'cloud_cover_mid'
# The 'cloud_cover_mid' and 'cloud_cover' fluctuations for more than half part are not so similar.
import matplotlib.pyplot as plt
df04[['cloud_cover_mid','cloud_cover']].plot(title='cloud_cover_mid vs cloud_cover in April 2025')
df05[['cloud_cover_mid','cloud_cover']].plot(title='cloud_cover_mid vs cloud_cover in May 2025')
Out[22]:
<Axes: title={'center': 'cloud_cover_mid vs cloud_cover in May 2025'}>
In [23]:
# Generate Correlation Table for 'cloud_cover_high'
gencorr('cloud_cover_high')
Out[23]:
cloud_cover_high 1.00 cloud_cover_mid 0.57 Name: cloud_cover_high, dtype: float64
In [24]:
# Draw plot of the most correlated column with 'cloud_cover_high'
# The 'cloud_cover_high' and 'cloud_cover_mid' fluctuations for more than half part are not so similar.
import matplotlib.pyplot as plt
df04[['cloud_cover_high','cloud_cover_mid']].plot(title='cloud_cover_high vs cloud_cover_mid in April 2025')
df05[['cloud_cover_high','cloud_cover_mid']].plot(title='cloud_cover_high vs cloud_cover_mid in May 2025')
Out[24]:
<Axes: title={'center': 'cloud_cover_high vs cloud_cover_mid in May 2025'}>
In [25]:
# Generate Correlation Table for 'wind_speed_10m'
gencorr('wind_speed_10m')
Out[25]:
wind_speed_10m 1.0 Name: wind_speed_10m, dtype: float64
In [26]:
# Draw plot of the most correlated column with 'wind_speed_10m'
# The 'wind_speed_10m' and 'wind_gusts_10m' fluctuations for the most part are similar except at several points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['wind_speed_10m','wind_gusts_10m']].plot(title='wind_speed_10m vs wind_gusts_10m in April 2025')
df05[['wind_speed_10m','wind_gusts_10m']].plot(title='wind_speed_10m vs wind_gusts_10m in May 2025')
Out[26]:
<Axes: title={'center': 'wind_speed_10m vs wind_gusts_10m in May 2025'}>
In [27]:
# Generate Correlation Table for 'soil_moisture_0_to_1cm'
gencorr('soil_moisture_0_to_1cm')
Out[27]:
soil_moisture_0_to_1cm 1.00 soil_moisture_1_to_3cm 0.99 soil_moisture_3_to_9cm 0.93 relative_humidity_2m 0.59 vapour_pressure_deficit 0.55 Name: soil_moisture_0_to_1cm, dtype: float64
In [28]:
# Draw plot of the most correlated column with 'soil_moisture_0_to_1cm'
# The 'soil_moisture_0_to_1cm' and 'soil_moisture_1_to_3cm' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_moisture_0_to_1cm','soil_moisture_1_to_3cm']].plot(title='soil_moisture_0_to_1cm vs soil_moisture_1_to_3cm in April 2025')
df05[['soil_moisture_0_to_1cm','soil_moisture_1_to_3cm']].plot(title='soil_moisture_0_to_1cm vs soil_moisture_1_to_3cm in May 2025')
Out[28]:
<Axes: title={'center': 'soil_moisture_0_to_1cm vs soil_moisture_1_to_3cm in May 2025'}>
In [29]:
# Generate Correlation Table for 'temperature_80m'
gencorr('temperature_80m')
Out[29]:
temperature_80m 1.00 temperature_2m 0.96 soil_temperature_0cm 0.86 dew_point_2m 0.71 vapour_pressure_deficit 0.52 Name: temperature_80m, dtype: float64
In [30]:
# Draw plot of the most correlated column with 'temperature_80m'
# The 'temperature_80m' and 'temperature_2m' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['temperature_80m','temperature_2m']].plot(title='temperature_80m vs temperature_2m in April 2025')
df05[['temperature_80m','temperature_2m']].plot(title='temperature_80m vs temperature_2m in May 2025')
Out[30]:
<Axes: title={'center': 'temperature_80m vs temperature_2m in May 2025'}>
In [31]:
# Generate Correlation Table for 'soil_temperature_0cm'
gencorr('soil_temperature_0cm')
Out[31]:
soil_temperature_0cm 1.00 temperature_2m 0.92 temperature_80m 0.86 dew_point_2m 0.63 vapour_pressure_deficit 0.54 Name: soil_temperature_0cm, dtype: float64
In [32]:
# Draw plot of the most correlated column with 'soil_temperature_0cm'
# The 'soil_temperature_0cm' and 'temperature_2m' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_temperature_0cm','temperature_2m']].plot(title='soil_temperature_0cm vs temperature_2m in April 2025')
df05[['soil_temperature_0cm','temperature_2m']].plot(title='soil_temperature_0cm vs temperature_2m in May 2025')
Out[32]:
<Axes: title={'center': 'soil_temperature_0cm vs temperature_2m in May 2025'}>
In [33]:
# Generate Correlation Table for 'soil_moisture_1_to_3cm'
gencorr('soil_moisture_1_to_3cm')
Out[33]:
soil_moisture_1_to_3cm 1.00 soil_moisture_0_to_1cm 0.99 soil_moisture_3_to_9cm 0.96 relative_humidity_2m 0.57 vapour_pressure_deficit 0.52 Name: soil_moisture_1_to_3cm, dtype: float64
In [34]:
# Draw plot of the most correlated column with 'soil_moisture_1_to_3cm'
# The 'soil_moisture_1_to_3cm' and 'soil_moisture_0_to_1cm' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_moisture_1_to_3cm','soil_moisture_0_to_1cm']].plot(title='soil_moisture_1_to_3cm vs soil_moisture_0_to_1cm in April 2025')
df05[['soil_moisture_1_to_3cm','soil_moisture_0_to_1cm']].plot(title='soil_moisture_1_to_3cm vs soil_moisture_0_to_1cm in May 2025')
Out[34]:
<Axes: title={'center': 'soil_moisture_1_to_3cm vs soil_moisture_0_to_1cm in May 2025'}>
In [35]:
# Generate Correlation Table for 'soil_moisture_3_to_9cm'
gencorr('soil_moisture_3_to_9cm')
Out[35]:
soil_moisture_3_to_9cm 1.00 soil_moisture_1_to_3cm 0.96 soil_moisture_0_to_1cm 0.93 relative_humidity_2m 0.53 Name: soil_moisture_3_to_9cm, dtype: float64
In [36]:
# Draw plot of the most correlated column with 'soil_moisture_3_to_9cm'
# The 'soil_moisture_3_to_9cm' and 'soil_moisture_1_to_3cm' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_moisture_3_to_9cm','soil_moisture_1_to_3cm']].plot(title='soil_moisture_3_to_9cm vs soil_moisture_1_to_3cm in April 2025')
df05[['soil_moisture_3_to_9cm','soil_moisture_1_to_3cm']].plot(title='soil_moisture_3_to_9cm vs soil_moisture_1_to_3cm in May 2025')
Out[36]:
<Axes: title={'center': 'soil_moisture_3_to_9cm vs soil_moisture_1_to_3cm in May 2025'}>
In [37]:
# Generate Correlation Table for 'vapour_pressure_deficit'
gencorr('vapour_pressure_deficit')
Out[37]:
vapour_pressure_deficit 1.00 relative_humidity_2m 0.86 temperature_2m 0.65 soil_moisture_0_to_1cm 0.55 soil_temperature_0cm 0.54 temperature_80m 0.52 soil_moisture_1_to_3cm 0.52 Name: vapour_pressure_deficit, dtype: float64
In [38]:
# Draw plot of the most correlated column with 'vapour_pressure_deficit'
# The 'vapour_pressure_deficit' and 'soil_moisture_0_to_1cm' fluctuations for more than half part are similar.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['vapour_pressure_deficit','soil_moisture_0_to_1cm']].plot(title='vapour_pressure_deficit vs soil_moisture_0_to_1cm in April 2025')
df05[['vapour_pressure_deficit','soil_moisture_0_to_1cm']].plot(title='vapour_pressure_deficit vs soil_moisture_0_to_1cm in May 2025')
Out[38]:
<Axes: title={'center': 'vapour_pressure_deficit vs soil_moisture_0_to_1cm in May 2025'}>
In [39]:
# Draw plot of the most correlated column with 'vapour_pressure_deficit'
# The vapour_pressure_deficit values are the good reverse of the vapour_pressure_deficit for the most part.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('vapour_pressure_deficit vs soil_temperature_0cm in April 2025')
ax1.plot(df04[['vapour_pressure_deficit']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['soil_temperature_0cm']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('vapour_pressure_deficit vs soil_temperature_0cm in May 2025')
ax1.plot(df05[['vapour_pressure_deficit']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['soil_temperature_0cm']], color='green', label='Data 2')
Out[39]:
[<matplotlib.lines.Line2D at 0x17f9ce3ecc0>]
In [40]:
# Draw the Heatmap of Weather Data Correlation
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(df1h.corr(numeric_only=True), annot=True)
plt.rcParams['figure.figsize'] = (20,10)
plt.show()
In [41]:
# How about if we limit the 'temperature_2m' to certain range, let's say, more than 18C
# Data for 4 Columns where temperature_2m more than 18 C
df1h2 = df1h.where(df1h['temperature_2m'] > 18)
df1h2 = df1h2.dropna()
df1h2
Out[41]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 595 | 0.0 | 100.0 | 0.231 | 0.238 | 72.0 | 13.068562 | 0.585521 | 18.1995 | 100.0 | 100.0 | 26.072052 | 0.253 | 17.660500 | 23.367500 |
| 596 | 1.0 | 100.0 | 0.229 | 0.237 | 69.0 | 13.279072 | 0.685738 | 19.0995 | 100.0 | 100.0 | 26.987997 | 0.252 | 18.660500 | 21.267500 |
| 597 | 1.0 | 0.0 | 0.229 | 0.237 | 70.0 | 13.691494 | 0.671908 | 19.2995 | 5.0 | 100.0 | 28.241076 | 0.252 | 18.910500 | 21.917501 |
| 598 | 2.0 | 0.0 | 0.228 | 0.236 | 69.0 | 14.139482 | 0.725077 | 19.9995 | 0.0 | 97.0 | 28.467329 | 0.251 | 19.060501 | 21.867500 |
| 599 | 8.0 | 0.0 | 0.227 | 0.235 | 71.0 | 14.197729 | 0.661702 | 19.5995 | 0.0 | 56.0 | 28.241076 | 0.251 | 19.110500 | 21.217500 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 3.0 | 33.0 | 0.265 | 0.269 | 76.0 | 17.093630 | 0.615556 | 21.5025 | 90.0 | 100.0 | 5.091168 | 0.275 | 21.160500 | 25.567501 |
| 2228 | 2.0 | 100.0 | 0.262 | 0.266 | 77.0 | 17.687681 | 0.604456 | 21.9025 | 100.0 | 98.0 | 5.860375 | 0.273 | 20.560501 | 25.567501 |
| 2229 | 3.0 | 100.0 | 0.261 | 0.264 | 76.0 | 17.577087 | 0.634595 | 22.0025 | 100.0 | 100.0 | 9.387651 | 0.271 | 20.210500 | 24.367500 |
| 2230 | 6.0 | 100.0 | 0.260 | 0.263 | 75.0 | 16.932825 | 0.643173 | 21.5525 | 100.0 | 74.0 | 12.599998 | 0.270 | 19.360500 | 24.967500 |
| 2231 | 10.0 | 100.0 | 0.259 | 0.262 | 75.0 | 16.208752 | 0.614321 | 20.8025 | 100.0 | 71.0 | 12.287555 | 0.268 | 17.710500 | 22.217500 |
884 rows × 14 columns
In [ ]:
In [42]:
sns.heatmap(df1h2.corr(numeric_only=True), annot=True)
plt.rcParams['figure.figsize'] = (20,10)
plt.show()
In [43]:
######### Conclusion from the Heatmap ##############
# 'relative_humidity_2m' has correlation of -0.86 to 'vapour_pressure_deficit'.
# 'temperature_80m' has correlation of 0.92 to 'temperature_2m'.
# 'temperature_80m' has correlation of 0.86 to 'soil_temperature_0cm'.
# 'temperature_80m' has correlation of 0.71 to 'dew_point_2m'.
# 'temperature_2m' has correlation of 0.92 to 'soil_temperature_0cm'.
# 'dew_point_2m' has correlation of 0.81 to 'relative_humidity_2m' parameter if temperature_2m > 18C, compared to 0.56 with all data included.
# 'cloud_cover_mid' has correlation of 0.51 to 'precipitation_probability'.
# 'cloud_cover_low' has correlation of 0.52 to 'relative_humidity_2m'
# 'soil_moisture_0_to_1cm' has correlation of 0.59 to 'relative_humidity_2m'.
# 'soil_temperature_0cm has correlation of 0.63 to 'dew_point_2m'.
# 'vapour_pressure_deficit' has correlation of 0.65 to 'temperature_2m'.
In [44]:
# LAB : Check 'corr' method of Pandas with mathematical high order polynomial equations (quadratic to power-of-5 equations)
# Equations used:
# 1. y1 = 2x^2 + 5x + 6
# 2. y2 = 2x^3 + 7x^2 + 10x + 20
# 3. y3 = 5x^4 + 12x^3 + 8x^2 + 6x + 20
# 4. y4 = 3x^5 + 8x^4 + 10x^3 + 4x^2 + 9x + 17
# 5. y5 = 2x^7 + 9x^6 + 12x^5 + 31x^4 + 90x^3 + 6x^2 + 78x + 111
dfq = pd.read_csv(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\Global_Weather\quadratic_equation.csv')
dfq
Out[44]:
| x | y1 | y2 | y3 | y4 | y5 | |
|---|---|---|---|---|---|---|
| 0 | 1 | 13 | 39 | 51 | 51 | 339 |
| 1 | 2 | 24 | 84 | 240 | 355 | 2723 |
| 2 | 3 | 39 | 167 | 839 | 1727 | 19191 |
| 3 | 4 | 58 | 300 | 2220 | 5877 | 96135 |
| 4 | 5 | 81 | 495 | 4875 | 15787 | 365651 |
| 5 | 6 | 108 | 764 | 9416 | 36071 | 1133499 |
| 6 | 7 | 139 | 1119 | 16575 | 73335 | 3013863 |
| 7 | 8 | 174 | 1572 | 27204 | 136537 | 7120991 |
| 8 | 9 | 213 | 2135 | 42275 | 237347 | 15327795 |
| 9 | 10 | 256 | 2820 | 62880 | 390507 | 30601491 |
| 10 | 11 | 303 | 3639 | 90231 | 614191 | 57426359 |
| 11 | 12 | 354 | 4604 | 125660 | 930365 | 102323703 |
| 12 | 13 | 409 | 5727 | 170619 | 1365147 | 174479091 |
| 13 | 14 | 468 | 7020 | 226680 | 1949167 | 286486955 |
| 14 | 15 | 531 | 8495 | 295535 | 2717927 | 455222631 |
| 15 | 16 | 598 | 10164 | 378996 | 3712161 | 702851919 |
| 16 | 17 | 669 | 12039 | 478995 | 4978195 | 1057988243 |
| 17 | 18 | 744 | 14132 | 597584 | 6568307 | 1557007491 |
In [45]:
sns.heatmap(dfq.corr(numeric_only=True), annot=True)
plt.rcParams['figure.figsize'] = (20,10)
plt.show()
In [46]:
# CONCLUSION:
# 'corr' Method of Pandas is accurate enough until cube equation, but for higher order equation, is much less.
# Correlation value of 0.8 is still can be regarded as correlated enough, due to corr method algorithm limitation.
In [47]:
# Trendline of Polynomial Equations of Dataset x and y1 from the previous Lab Dataset.
# Convert the data in Dataframe into a basic List
dfqdictx = dfq[['x']].to_dict(orient='records')
dfqdicty1 = dfq[['y1']].to_dict(orient='records')
x = []
y1 = []
for i in dfqdictx:
x.append(i['x'])
for i in dfqdicty1:
y1.append(i['y1'])
import matplotlib.pyplot as plt
import numpy as np
#create scatterplot
plt.scatter(x, y1)
#calculate equation for quadratic trendline
z = np.polyfit(x, y1, 2)
p = np.poly1d(z)
print(p)
#add trendline to plot
plt.plot(x, p(x))
2 2 x + 5 x + 6
Out[47]:
[<matplotlib.lines.Line2D at 0x17fa354f710>]
In [ ]:
In [48]:
# Conclusion: it's proven that the Trendline equation made by 'poly1d' Function is exactly the same as the original one.
In [49]:
# Trendline of Polynomial Equations of Dataset x and y2 from the previous Lab Dataset.
# Convert the data in Dataframe into a basic List
dfqdicty2 = dfq[['y2']].to_dict(orient='records')
y2 = []
for i in dfqdicty2:
y2.append(i['y2'])
import matplotlib.pyplot as plt
#create scatterplot
plt.scatter(x, y2)
#calculate equation for quadratic trendline
z = np.polyfit(x, y2, 3)
p = np.poly1d(z) # It's proven that poly1d is successfully create the polynomial equation for y2 dataset
print(p)
#add trendline to plot
plt.plot(x, p(x))
3 2 2 x + 7 x + 10 x + 20
Out[49]:
[<matplotlib.lines.Line2D at 0x17fa5ba7860>]
In [50]:
# Trendline of Polynomial Equations of Dataset x and y3 from the previous Lab Dataset.
# Convert the data in Dataframe into a basic List
dfqdicty3 = dfq[['y3']].to_dict(orient='records')
y3 = []
for i in dfqdicty3:
y3.append(i['y3'])
import matplotlib.pyplot as plt
#create scatterplot
plt.scatter(x, y3)
#calculate equation for quadratic trendline
z = np.polyfit(x, y3, 4)
p = np.poly1d(z) # It's proven that poly1d is successfully create the polynomial equation for y3 dataset
print(p)
#add trendline to plot
plt.plot(x, p(x))
4 3 2 5 x + 12 x + 8 x + 6 x + 20
Out[50]:
[<matplotlib.lines.Line2D at 0x17fa4d3be90>]
In [51]:
# Trendline of Polynomial Equations of Dataset x and y4 from the previous Lab Dataset.
# Convert the data in Dataframe into a basic List
dfqdicty4 = dfq[['y4']].to_dict(orient='records')
y4 = []
for i in dfqdicty4:
y4.append(i['y4'])
import matplotlib.pyplot as plt
#create scatterplot
plt.scatter(x, y4)
#calculate equation for quadratic trendline
z = np.polyfit(x, y4, 5)
p = np.poly1d(z) # It's proven that poly1d is successfully create the polynomial equation for y4 dataset
print(p)
#add trendline to plot
plt.plot(x, p(x))
5 4 3 2 3 x + 8 x + 10 x + 4 x + 9 x + 17
Out[51]:
[<matplotlib.lines.Line2D at 0x17fa4deb890>]
In [52]:
# Trendline of Polynomial Equations of Dataset x and y5 from the previous Lab Dataset.
# Convert the data in Dataframe into a basic List
dfqdicty5 = dfq[['y5']].to_dict(orient='records')
y5 = []
for i in dfqdicty5:
y5.append(i['y5'])
import matplotlib.pyplot as plt
#create scatterplot
plt.scatter(x, y5)
#calculate equation for quadratic trendline
z = np.polyfit(x, y5, 7)
p = np.poly1d(z) # It's proven that poly1d is successfully create the polynomial equation for y5 dataset
print(p)
#add trendline to plot
plt.plot(x, p(x))
7 6 5 4 3 2 2 x + 9 x + 12 x + 31 x + 90 x + 6 x + 78 x + 111
Out[52]:
[<matplotlib.lines.Line2D at 0x17fa5395220>]
In [53]:
# The plot of all data of all Columns/parameters
df1
Out[53]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 581 | 581 | 2025-03-19 05:00:00+00:00 | 16.7995 | 53.0 | 7.163813 | 13.256210 | 0.0 | 0.0 | 0.0 | 0.0 | 1012.3 | 993.66210 | 100.0 | 0.0 | 0.0 | 100.0 | 24140.0 | 19.296133 | 0.233 | 0.240 | 0.257 | 0.289 | 0.312 | 0.0 | 0.0 | 0.0 | 0.097479 | 0.900061 | 38.034130 | 41.253933 | 57.802147 | 194.036270 | 197.062940 | 197.628110 | 193.690060 | 17.460500 | 17.3605 | 16.867500 | 13.617499 | 14.517500 | 14.317500 | 12.117499 | 58.320000 | Nashville | 2025-03-19 | 05:00:00+00:00 | 2025 | 03 | 19 | 05 | 00 | 00+00 | 00 |
| 582 | 582 | 2025-03-19 06:00:00+00:00 | 16.6995 | 53.0 | 7.071089 | 13.207399 | 0.0 | 0.0 | 0.0 | 0.0 | 1012.2 | 993.55770 | 100.0 | 0.0 | 0.0 | 100.0 | 24140.0 | 18.792551 | 0.232 | 0.240 | 0.257 | 0.288 | 0.312 | 0.0 | 0.0 | 0.0 | 0.095071 | 0.894375 | 37.123665 | 39.995990 | 54.588394 | 196.699330 | 199.840790 | 200.432900 | 192.569780 | 17.510500 | 17.4105 | 17.367500 | 14.917500 | 15.267500 | 14.517500 | 12.117499 | 57.600000 | Nashville | 2025-03-19 | 06:00:00+00:00 | 2025 | 03 | 19 | 06 | 00 | 00+00 | 00 |
| 583 | 583 | 2025-03-19 07:00:00+00:00 | 15.8495 | 57.0 | 7.343195 | 12.507574 | 0.0 | 0.0 | 0.0 | 0.0 | 1012.3 | 993.60156 | 100.0 | 0.0 | 0.0 | 100.0 | 24140.0 | 18.214718 | 0.232 | 0.240 | 0.256 | 0.288 | 0.311 | 0.0 | 0.0 | 0.0 | 0.080870 | 0.775166 | 35.900864 | 39.098087 | 54.206000 | 198.435040 | 201.161330 | 201.501500 | 196.990900 | 16.460500 | 16.3605 | 16.367500 | 14.517500 | 14.867499 | 14.467500 | 12.167500 | 58.320000 | Nashville | 2025-03-19 | 07:00:00+00:00 | 2025 | 03 | 19 | 07 | 00 | 00+00 | 00 |
| 584 | 584 | 2025-03-19 08:00:00+00:00 | 15.3495 | 59.0 | 7.379331 | 12.011299 | 0.0 | 0.0 | 0.0 | 0.0 | 1011.8 | 993.07860 | 100.0 | 0.0 | 0.0 | 100.0 | 24140.0 | 18.250260 | 0.232 | 0.240 | 0.256 | 0.287 | 0.311 | 0.0 | 0.0 | 0.0 | 0.075079 | 0.715844 | 35.747110 | 38.583614 | 51.580600 | 194.858660 | 198.799990 | 199.470400 | 199.573210 | 15.910501 | 15.8105 | 15.367499 | 14.417500 | 14.667500 | 14.417500 | 12.167500 | 57.960000 | Nashville | 2025-03-19 | 08:00:00+00:00 | 2025 | 03 | 19 | 08 | 00 | 00+00 | 00 |
| 585 | 585 | 2025-03-19 09:00:00+00:00 | 14.6495 | 61.0 | 7.208212 | 11.212486 | 0.0 | 0.0 | 0.0 | 0.0 | 1011.4 | 992.64110 | 100.0 | 0.0 | 0.0 | 100.0 | 24140.0 | 18.643690 | 0.232 | 0.240 | 0.256 | 0.287 | 0.311 | 0.0 | 0.0 | 0.0 | 0.070025 | 0.650970 | 35.188046 | 38.145466 | 48.876540 | 190.007920 | 193.609760 | 193.366940 | 200.704300 | 15.160501 | 15.0605 | 14.567500 | 13.567500 | 14.367499 | 14.367499 | 12.217500 | 58.320000 | Nashville | 2025-03-19 | 09:00:00+00:00 | 2025 | 03 | 19 | 09 | 00 | 00+00 | 00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 2227 | 2025-05-26 19:00:00+00:00 | 21.5025 | 76.0 | 17.093630 | 23.275127 | 3.0 | 0.0 | 0.0 | 0.0 | 1018.7 | 1000.24040 | 100.0 | 100.0 | 33.0 | 90.0 | 21100.0 | 5.091168 | 0.265 | 0.269 | 0.275 | 0.274 | 0.296 | 0.0 | 0.0 | 0.0 | 0.311080 | 0.615556 | 6.519877 | 19.110743 | 19.642280 | 81.869990 | 83.659904 | 88.898320 | 63.904633 | 21.160500 | 20.9105 | 19.767500 | 25.567501 | 23.167501 | 19.467500 | 19.367500 | 9.360000 | Nashville | 2025-05-26 | 19:00:00+00:00 | 2025 | 05 | 26 | 19 | 00 | 00+00 | 00 |
| 2228 | 2228 | 2025-05-26 20:00:00+00:00 | 21.9025 | 77.0 | 17.687681 | 23.821278 | 2.0 | 0.0 | 0.0 | 0.0 | 1018.1 | 999.67584 | 100.0 | 98.0 | 100.0 | 100.0 | 20700.0 | 5.860375 | 0.262 | 0.266 | 0.273 | 0.274 | 0.296 | 0.0 | 0.0 | 0.0 | 0.215838 | 0.604456 | 6.877789 | 20.329449 | 20.326454 | 42.510403 | 47.121110 | 83.774260 | 67.067870 | 20.560501 | 20.3605 | 20.667501 | 25.567501 | 23.267500 | 19.817501 | 19.367500 | 8.640000 | Nashville | 2025-05-26 | 20:00:00+00:00 | 2025 | 05 | 26 | 20 | 00 | 00+00 | 00 |
| 2229 | 2229 | 2025-05-26 21:00:00+00:00 | 22.0025 | 76.0 | 17.577087 | 23.358010 | 3.0 | 0.0 | 0.0 | 0.0 | 1017.5 | 999.09290 | 100.0 | 100.0 | 100.0 | 100.0 | 21200.0 | 9.387651 | 0.261 | 0.264 | 0.271 | 0.273 | 0.295 | 0.0 | 0.0 | 0.0 | 0.242568 | 0.634595 | 12.181624 | 22.123201 | 20.140705 | 32.471172 | 34.159744 | 85.236440 | 65.725570 | 20.210500 | 20.0105 | 20.867500 | 24.367500 | 23.017500 | 20.117500 | 19.367500 | 12.599999 | Nashville | 2025-05-26 | 21:00:00+00:00 | 2025 | 05 | 26 | 21 | 00 | 00+00 | 00 |
| 2230 | 2230 | 2025-05-26 22:00:00+00:00 | 21.5525 | 75.0 | 16.932825 | 22.161495 | 6.0 | 0.0 | 0.0 | 0.0 | 1017.1 | 998.67255 | 100.0 | 74.0 | 100.0 | 100.0 | 21300.0 | 12.599998 | 0.260 | 0.263 | 0.270 | 0.273 | 0.295 | 0.0 | 0.0 | 0.0 | 0.169983 | 0.643173 | 18.792550 | 16.370962 | 17.782688 | 53.130020 | 53.569054 | 99.039406 | 68.629310 | 19.360500 | 19.1605 | 21.167501 | 24.967500 | 22.967500 | 20.367500 | 19.367500 | 19.800000 | Nashville | 2025-05-26 | 22:00:00+00:00 | 2025 | 05 | 26 | 22 | 00 | 00+00 | 00 |
| 2231 | 2231 | 2025-05-26 23:00:00+00:00 | 20.8025 | 75.0 | 16.208752 | 21.156380 | 10.0 | 0.0 | 0.0 | 0.0 | 1017.1 | 998.62580 | 100.0 | 71.0 | 100.0 | 100.0 | 21100.0 | 12.287555 | 0.259 | 0.262 | 0.268 | 0.273 | 0.295 | 0.0 | 0.0 | 0.0 | 0.159207 | 0.614321 | 19.513195 | 19.695263 | 18.331741 | 58.172600 | 60.124096 | 81.416466 | 70.497470 | 17.710500 | 17.6105 | 20.967500 | 22.217500 | 22.667501 | 20.567501 | 19.367500 | 21.240000 | Nashville | 2025-05-26 | 23:00:00+00:00 | 2025 | 05 | 26 | 23 | 00 | 00+00 | 00 |
1651 rows × 53 columns
In [54]:
# Try to find the other Columns value when the 'temperature_2m' is maximum
#temperature_2m_max = df[df1.columns[1]].max() # OR temperature_2m_max = df['temperature_2m'].max()
temperature_2m_max = df1['temperature_2m'].max()
print(temperature_2m_max)
df1ht2mmax = df1.where(df1['temperature_2m'] == temperature_2m_max)
df1ht2mmax = df1ht2mmax.dropna()
df1ht2mmax[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
30.8525
Out[54]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2013 | 2025-05-17 21:00:00+00:00 | 30.8525 | 28.0 | 10.237361 | 0.0 | 0.0 | 0.0 | 0.0 | 26.525429 | 0.284 | 0.291 | 0.302 | 3.207373 | 27.4605 | 31.5175 |
In [55]:
# Try to find the other Columns value when the 'temperature_2m' is minimum
temperature_2m_min = df1['temperature_2m'].min()
print(temperature_2m_min)
df1ht2mmin = df1.where(df1['temperature_2m'] == temperature_2m_min)
df1ht2mmin = df1ht2mmin.dropna()
df1ht2mmin[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
-2.7975001
Out[55]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 636 | 2025-03-21 12:00:00+00:00 | -2.7975 | 87.0 | -4.659359 | 0.0 | 10.0 | 0.0 | 0.0 | 1.018234 | 0.267 | 0.269 | 0.273 | 0.06488 | 2.7105 | -3.0825 |
In [56]:
# Find values when the 'precipitation_probability' is Maximum
precipitation_probability_max = df1['precipitation_probability'].max()
print(precipitation_probability_max)
df1hppmax = df1.where(df1['precipitation_probability'] == precipitation_probability_max).sort_values(by = ['cloud_cover_mid','soil_moisture_0_to_1cm'], ascending=[False,False])
df1hppmax = df1hppmax.dropna()
df1hppmax[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm',
'cloud_cover_low','cloud_cover_high','wind_speed_10m','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
100.0
Out[56]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | cloud_cover_low | cloud_cover_high | wind_speed_10m | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1014 | 2025-04-06 06:00:00+00:00 | 14.902500 | 97.0 | 14.430263 | 100.0 | 100.0 | 0.417 | 100.0 | 100.0 | 12.229406 | 0.416 | 0.412 | 0.050880 | 15.310500 | 16.667501 |
| 1015 | 2025-04-06 07:00:00+00:00 | 15.752501 | 99.0 | 15.595463 | 100.0 | 100.0 | 0.406 | 99.0 | 100.0 | 7.968939 | 0.404 | 0.398 | 0.017908 | 15.510500 | 16.617500 |
| 1013 | 2025-04-06 05:00:00+00:00 | 15.302500 | 96.0 | 14.668031 | 100.0 | 100.0 | 0.396 | 100.0 | 100.0 | 9.957109 | 0.393 | 0.393 | 0.069604 | 14.760500 | 18.067501 |
| 1017 | 2025-04-06 09:00:00+00:00 | 16.502500 | 96.0 | 15.862131 | 100.0 | 100.0 | 0.390 | 99.0 | 100.0 | 9.793058 | 0.395 | 0.406 | 0.075138 | 15.210501 | 14.667500 |
In [57]:
# Find values when the 'precipitation_probability' is 50%
df1hpp50 = df1.where(df1['precipitation_probability'] == 50).sort_values(by = ['cloud_cover_mid'], ascending=[False])
df1hpp50 = df1hpp50.dropna()
df1hpp50[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_mid','cloud_cover_low',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
Out[57]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_mid | cloud_cover_low | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2055 | 2025-05-19 15:00:00+00:00 | 20.0025 | 96.0 | 19.344767 | 50.0 | 99.0 | 100.0 | 99.0 | 6.379216 | 0.262 | 0.263 | 0.261 | 0.093540 | 20.160500 | 27.417501 |
| 699 | 2025-03-24 03:00:00+00:00 | 17.2525 | 90.0 | 15.596647 | 50.0 | 98.0 | 99.0 | 4.0 | 16.595179 | 0.381 | 0.381 | 0.393 | 0.196987 | 15.910501 | 15.917500 |
| 1897 | 2025-05-13 01:00:00+00:00 | 19.1525 | 91.0 | 17.647644 | 50.0 | 95.0 | 7.0 | 99.0 | 2.741678 | 0.315 | 0.317 | 0.322 | 0.199684 | 20.010500 | 19.617500 |
| 1228 | 2025-04-15 04:00:00+00:00 | 17.7025 | 64.0 | 10.806604 | 50.0 | 61.0 | 100.0 | 0.0 | 15.188417 | 0.358 | 0.307 | 0.226 | 0.729761 | 17.610500 | 17.017500 |
In [58]:
# Find values when the 'precipitation_probability' is Minimum
precipitation_probability_min = df1['precipitation_probability'].min()
print(precipitation_probability_min)
df1hppmin = df1.where(df1['precipitation_probability'] == precipitation_probability_min).sort_values(by = ['cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[True,True,True,True,True,True,True,True]).head(5)
df1hppmin = df1hppmin.dropna()
df1hppmin[['date','temperature_2m','precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm',
'soil_moisture_3_to_9cm','relative_humidity_2m','cloud_cover_high','dew_point_2m','vapour_pressure_deficit',
'cloud_cover_low','wind_speed_10m','soil_moisture_1_to_3cm','temperature_80m','soil_temperature_0cm']]
0.0
Out[58]:
| date | temperature_2m | precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_3_to_9cm | relative_humidity_2m | cloud_cover_high | dew_point_2m | vapour_pressure_deficit | cloud_cover_low | wind_speed_10m | soil_moisture_1_to_3cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1374 | 2025-04-21 06:00:00+00:00 | 22.3525 | 0.0 | 0.0 | 0.149 | 0.195 | 58.0 | 9.0 | 13.688183 | 1.134731 | 0.0 | 17.771753 | 0.168 | 21.3605 | 20.267500 |
| 1373 | 2025-04-21 05:00:00+00:00 | 22.5025 | 0.0 | 0.0 | 0.149 | 0.196 | 53.0 | 0.0 | 12.447384 | 1.281522 | 0.0 | 13.441071 | 0.168 | 21.4605 | 19.817501 |
| 1341 | 2025-04-19 21:00:00+00:00 | 27.7025 | 0.0 | 0.0 | 0.161 | 0.202 | 45.0 | 98.0 | 14.694766 | 2.042824 | 5.0 | 10.799999 | 0.179 | 25.6105 | 31.167501 |
| 1340 | 2025-04-19 20:00:00+00:00 | 28.1025 | 0.0 | 0.0 | 0.163 | 0.203 | 46.0 | 63.0 | 15.399475 | 2.052846 | 27.0 | 12.522619 | 0.180 | 25.6605 | 32.467500 |
| 1322 | 2025-04-19 02:00:00+00:00 | 25.0025 | 0.0 | 0.0 | 0.169 | 0.210 | 36.0 | 0.0 | 8.896405 | 2.028199 | 0.0 | 7.628263 | 0.186 | 22.6605 | 20.667501 |
In [59]:
# Try to find the other Columns value when the 'relative_humidity_2m' is maximum
relative_humidity_2m_max = df1['relative_humidity_2m'].max()
print(relative_humidity_2m_max)
df1hrh2mmax = df1.where(df1['relative_humidity_2m'] == relative_humidity_2m_max)
df1hrh2mmax = df1hrh2mmax.dropna()
df1hrh2mmax[['date','relative_humidity_2m','temperature_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']].sort_values(by='precipitation_probability', ascending=False)
100.0
Out[59]:
| date | relative_humidity_2m | temperature_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1016 | 2025-04-06 08:00:00+00:00 | 100.0 | 15.8025 | 15.802500 | 99.0 | 61.0 | 98.0 | 100.0 | 15.696165 | 0.427 | 0.432 | 0.440 | 0.000000e+00 | 15.810500 | 15.867499 |
| 1010 | 2025-04-06 02:00:00+00:00 | 100.0 | 16.1025 | 16.102500 | 87.0 | 100.0 | 100.0 | 98.0 | 3.396233 | 0.400 | 0.402 | 0.361 | 0.000000e+00 | 13.610500 | 20.617500 |
| 951 | 2025-04-03 15:00:00+00:00 | 100.0 | 18.9525 | 18.952500 | 86.0 | 100.0 | 76.0 | 0.0 | 2.305125 | 0.377 | 0.376 | 0.377 | 0.000000e+00 | 17.460500 | 21.717500 |
| 1009 | 2025-04-06 01:00:00+00:00 | 100.0 | 17.1525 | 17.152498 | 72.0 | 100.0 | 100.0 | 98.0 | 6.479999 | 0.332 | 0.306 | 0.294 | 1.192093e-07 | 12.910501 | 21.717500 |
| 952 | 2025-04-03 16:00:00+00:00 | 100.0 | 19.5025 | 19.502502 | 57.0 | 100.0 | 95.0 | 0.0 | 5.815978 | 0.339 | 0.350 | 0.362 | 0.000000e+00 | 18.010500 | 23.867500 |
| 2080 | 2025-05-20 16:00:00+00:00 | 100.0 | 22.1025 | 22.102500 | 45.0 | 100.0 | 99.0 | 99.0 | 8.404285 | 0.295 | 0.295 | 0.288 | 0.000000e+00 | 23.660500 | 24.317501 |
| 1671 | 2025-05-03 15:00:00+00:00 | 100.0 | 16.3525 | 16.352500 | 37.0 | 100.0 | 100.0 | 97.0 | 13.217443 | 0.347 | 0.350 | 0.356 | 0.000000e+00 | 16.510500 | 17.417501 |
| 1457 | 2025-04-24 17:00:00+00:00 | 100.0 | 19.5025 | 19.502502 | 35.0 | 100.0 | 100.0 | 58.0 | 1.484318 | 0.306 | 0.308 | 0.305 | 0.000000e+00 | 17.560501 | 28.117500 |
| 1741 | 2025-05-06 13:00:00+00:00 | 100.0 | 11.9525 | 11.952499 | 0.0 | 100.0 | 0.0 | 0.0 | 3.096837 | 0.297 | 0.299 | 0.303 | 1.192093e-07 | 11.710501 | 14.967500 |
| 1790 | 2025-05-08 14:00:00+00:00 | 100.0 | 18.6525 | 18.652498 | 0.0 | 100.0 | 0.0 | 0.0 | 0.509117 | 0.306 | 0.308 | 0.311 | 4.768372e-07 | 18.010500 | 22.417501 |
| 1932 | 2025-05-14 12:00:00+00:00 | 100.0 | 16.7525 | 16.752500 | 0.0 | 100.0 | 0.0 | 0.0 | 5.804825 | 0.300 | 0.302 | 0.307 | 0.000000e+00 | 16.510500 | 18.467500 |
| 1934 | 2025-05-14 14:00:00+00:00 | 100.0 | 19.1525 | 19.152498 | 0.0 | 100.0 | 0.0 | 0.0 | 8.404285 | 0.297 | 0.300 | 0.306 | 2.384186e-07 | 19.010500 | 23.767500 |
In [60]:
# Conclusion: when 'relative_humidity_2m' reaches maximum, the 'precipitation_probability' is one of the highest also.
# This concurs to common logic, in contrary to condition in Las Vegas
# (precipitation_probability is extremely low when the 'relative_humidity_2m' reaches maximum).
In [61]:
# Find values when the 'relative_humidity_2m' is 50%
df1hrh50 = df1.where(df1['relative_humidity_2m'] == 50).sort_values(by = ['precipitation_probability'], ascending=[False])
df1hrh50 = df1hrh50.dropna()
df1hrh50[['date','relative_humidity_2m','temperature_2m','vapour_pressure_deficit','dew_point_2m','precipitation_probability','cloud_cover_mid','cloud_cover_low',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','temperature_80m','soil_temperature_0cm']]
Out[61]:
| date | relative_humidity_2m | temperature_2m | vapour_pressure_deficit | dew_point_2m | precipitation_probability | cloud_cover_mid | cloud_cover_low | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1227 | 2025-04-15 03:00:00+00:00 | 50.0 | 23.252500 | 1.426596 | 12.251946 | 39.0 | 0.0 | 97.0 | 0.0 | 2.305125 | 0.307 | 0.210 | 0.216 | 18.810501 | 18.417501 |
| 1439 | 2025-04-23 23:00:00+00:00 | 50.0 | 26.402500 | 1.720850 | 15.145651 | 19.0 | 13.0 | 16.0 | 99.0 | 10.137692 | 0.267 | 0.249 | 0.260 | 23.460500 | 24.867500 |
| 1225 | 2025-04-15 01:00:00+00:00 | 50.0 | 25.702500 | 1.651252 | 14.502851 | 11.0 | 0.0 | 10.0 | 0.0 | 12.261158 | 0.177 | 0.193 | 0.216 | 21.960500 | 22.467500 |
| 832 | 2025-03-29 16:00:00+00:00 | 50.0 | 22.152500 | 1.334735 | 11.240775 | 4.0 | 100.0 | 0.0 | 98.0 | 23.675303 | 0.187 | 0.196 | 0.214 | 19.960500 | 23.017500 |
| 1943 | 2025-05-14 23:00:00+00:00 | 50.0 | 28.052500 | 1.895097 | 16.660276 | 3.0 | 42.0 | 100.0 | 0.0 | 17.819090 | 0.306 | 0.308 | 0.308 | 25.210500 | 26.417501 |
| 1941 | 2025-05-14 21:00:00+00:00 | 50.0 | 28.052500 | 1.895097 | 16.660276 | 2.0 | 3.0 | 8.0 | 0.0 | 17.057314 | 0.346 | 0.295 | 0.298 | 25.860500 | 26.967500 |
| 827 | 2025-03-29 11:00:00+00:00 | 50.0 | 20.002500 | 1.169993 | 9.263409 | 1.0 | 15.0 | 0.0 | 76.0 | 9.255571 | 0.188 | 0.197 | 0.216 | 17.860500 | 16.467500 |
| 2036 | 2025-05-18 20:00:00+00:00 | 50.0 | 27.102500 | 1.792990 | 15.788316 | 1.0 | 7.0 | 8.0 | 17.0 | 5.052841 | 0.271 | 0.276 | 0.285 | 25.260500 | 27.917501 |
| 1216 | 2025-04-14 16:00:00+00:00 | 50.0 | 25.952500 | 1.675822 | 14.732438 | 0.0 | 2.0 | 0.0 | 19.0 | 17.974781 | 0.200 | 0.209 | 0.227 | 21.760500 | 29.017500 |
| 1307 | 2025-04-18 11:00:00+00:00 | 50.0 | 18.552500 | 1.069192 | 7.929086 | 0.0 | 99.0 | 6.0 | 0.0 | 15.379206 | 0.190 | 0.200 | 0.220 | 16.610500 | 15.167500 |
| 1206 | 2025-04-14 06:00:00+00:00 | 50.0 | 17.652500 | 1.010522 | 7.100584 | 0.0 | 0.0 | 0.0 | 0.0 | 7.421590 | 0.201 | 0.211 | 0.231 | 17.760500 | 15.917500 |
| 1412 | 2025-04-22 20:00:00+00:00 | 50.0 | 24.852500 | 1.570049 | 13.722118 | 0.0 | 0.0 | 33.0 | 5.0 | 5.937272 | 0.249 | 0.262 | 0.277 | 21.810501 | 28.117500 |
| 1414 | 2025-04-22 22:00:00+00:00 | 50.0 | 24.102500 | 1.501311 | 13.033067 | 0.0 | 0.0 | 93.0 | 95.0 | 4.452954 | 0.241 | 0.254 | 0.272 | 21.660500 | 25.517500 |
| 1085 | 2025-04-09 05:00:00+00:00 | 50.0 | 4.002500 | 0.407752 | -5.493462 | 0.0 | 0.0 | 0.0 | 0.0 | 3.600000 | 0.241 | 0.247 | 0.264 | 4.510500 | 1.817500 |
| 1611 | 2025-05-01 03:00:00+00:00 | 50.0 | 24.602500 | 1.546837 | 13.492453 | 0.0 | 76.0 | 0.0 | 64.0 | 6.130579 | 0.152 | 0.173 | 0.205 | 24.110500 | 22.417501 |
| 1746 | 2025-05-06 18:00:00+00:00 | 50.0 | 21.352500 | 1.271212 | 10.505167 | 0.0 | 0.0 | 5.0 | 0.0 | 1.609969 | 0.283 | 0.287 | 0.296 | 18.860500 | 26.267500 |
| 1057 | 2025-04-08 01:00:00+00:00 | 50.0 | 11.002501 | 0.657563 | 0.971700 | 0.0 | 0.0 | 0.0 | 0.0 | 5.154415 | 0.259 | 0.267 | 0.284 | 11.060500 | 8.367499 |
| 914 | 2025-04-02 02:00:00+00:00 | 50.0 | 15.502501 | 0.881633 | 5.120452 | 0.0 | 0.0 | 0.0 | 0.0 | 4.843305 | 0.241 | 0.248 | 0.262 | 16.260500 | 12.117499 |
| 2019 | 2025-05-18 03:00:00+00:00 | 50.0 | 22.002500 | 1.322617 | 11.102864 | 0.0 | 0.0 | 0.0 | 0.0 | 4.334974 | 0.282 | 0.285 | 0.293 | 22.410500 | 20.117500 |
| 1343 | 2025-04-19 23:00:00+00:00 | 50.0 | 26.852500 | 1.766929 | 15.558811 | 0.0 | 98.0 | 0.0 | 98.0 | 8.404284 | 0.159 | 0.177 | 0.201 | 24.460500 | 26.217500 |
In [62]:
# Conclusion: when 'relative_humidity_2m' reaches 50%, the highest 'precipitation_probability' is 39%.
In [63]:
# Try to find the other Columns value when the 'relative_humidity_2m' is minimum
relative_humidity_2m_min = df1['relative_humidity_2m'].min()
print(relative_humidity_2m_min)
df1hrh2mmin = df1.where(df1['relative_humidity_2m'] == relative_humidity_2m_min)
df1hrh2mmin = df1hrh2mmin.dropna()
df1hrh2mmin[['date','relative_humidity_2m','temperature_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']].sort_values(by='precipitation_probability', ascending=True)
19.0
Out[63]:
| date | relative_humidity_2m | temperature_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1269 | 2025-04-16 21:00:00+00:00 | 19.0 | 21.5525 | -3.064591 | 0.0 | 0.0 | 0.0 | 0.0 | 6.369050 | 0.217 | 0.232 | 0.246 | 2.085398 | 17.6605 | 24.8675 |
| 1270 | 2025-04-16 22:00:00+00:00 | 19.0 | 21.5525 | -3.064591 | 0.0 | 0.0 | 0.0 | 37.0 | 5.400000 | 0.215 | 0.229 | 0.244 | 2.085398 | 17.5105 | 22.7175 |
| 1271 | 2025-04-16 23:00:00+00:00 | 19.0 | 20.8025 | -3.681689 | 0.0 | 0.0 | 0.0 | 24.0 | 5.315336 | 0.213 | 0.227 | 0.243 | 1.991730 | 17.2605 | 19.6175 |
In [64]:
# Conclusion: when the 'relative_humidity_2m' reaches minimum (19%), the 'precipitation_probability' reaches minimum also (0%).
In [65]:
# Find Daily Average of Precipitation Probability (Table)
dfda1 = df1.groupby('ymd')[df1h.columns[0:]].mean().sort_values('precipitation_probability', ascending=False).head(5)
dfda1
Out[65]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ymd | ||||||||||||||
| 2025-04-06 | 54.833333 | 93.708333 | 0.352958 | 0.354542 | 96.041667 | 13.331632 | 0.061971 | 13.954583 | 74.041667 | 96.625000 | 8.261871 | 0.358708 | 13.395917 | 15.734167 |
| 2025-04-03 | 51.375000 | 81.875000 | 0.298792 | 0.300750 | 82.125000 | 17.717496 | 0.574346 | 21.369167 | 76.250000 | 71.541667 | 14.200839 | 0.309208 | 20.304250 | 22.555000 |
| 2025-05-03 | 31.291667 | 91.291667 | 0.335875 | 0.339000 | 91.250000 | 15.129816 | 0.165439 | 16.571250 | 89.041667 | 63.166667 | 8.347819 | 0.340208 | 15.714667 | 18.230000 |
| 2025-03-31 | 27.500000 | 49.291667 | 0.309292 | 0.311667 | 87.791667 | 13.269716 | 0.212106 | 15.308750 | 62.000000 | 78.166667 | 10.745450 | 0.308917 | 14.941750 | 17.171667 |
| 2025-05-12 | 26.791667 | 58.250000 | 0.341292 | 0.339500 | 83.166667 | 17.631228 | 0.468887 | 20.823333 | 71.666667 | 74.041667 | 8.622397 | 0.335333 | 19.356333 | 21.830000 |
In [66]:
# Find Daily Average of Precipitation Probability (Plot)
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20, 10) # Set global size for all plots
dfda2 = df1.groupby('ymd')[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].mean().sort_values('ymd', ascending=True)
dfda2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[66]:
array([<Axes: xlabel='ymd'>, <Axes: xlabel='ymd'>], dtype=object)
In [67]:
# Find Daily Average of Relative Humidity 2m (Table)
dfda3 = df1.groupby('ymd')[df1h.columns[[4,0]]].mean().sort_values('relative_humidity_2m', ascending=False).head(10)
dfda3
Out[67]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-04-06 | 96.041667 | 54.833333 |
| 2025-05-03 | 91.250000 | 31.291667 |
| 2025-04-24 | 90.541667 | 21.375000 |
| 2025-05-08 | 89.083333 | 11.708333 |
| 2025-04-25 | 88.916667 | 24.041667 |
| 2025-05-05 | 88.416667 | 6.333333 |
| 2025-05-04 | 88.041667 | 9.750000 |
| 2025-03-31 | 87.791667 | 27.500000 |
| 2025-05-20 | 87.583333 | 19.458333 |
| 2025-05-09 | 87.458333 | 3.166667 |
In [68]:
# Find Monthly Average values of Precipitation Probability (Table)
dfma1 = df1.groupby(['year','month'])[df1h.columns[0:]].mean().sort_values(by = ['precipitation_probability','cloud_cover_mid',
'soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m','vapour_pressure_deficit'],
ascending=[False,False,False,False,False,False,False,False,False,False])
dfma1
Out[68]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | month | ||||||||||||||
| 2025 | 05 | 8.831731 | 33.673077 | 0.287492 | 0.289117 | 74.913462 | 14.547957 | 0.671594 | 19.638878 | 39.136218 | 42.022436 | 8.303039 | 0.292835 | 18.513705 | 20.397869 |
| 04 | 8.218056 | 28.886111 | 0.253883 | 0.259607 | 66.851389 | 10.539709 | 0.786940 | 17.725695 | 30.741667 | 33.441667 | 9.382421 | 0.269937 | 16.591750 | 18.325139 | |
| 03 | 5.859935 | 20.283388 | 0.243567 | 0.247840 | 54.768730 | 5.096467 | 0.880913 | 15.285845 | 18.687296 | 26.322476 | 11.941970 | 0.256166 | 14.110012 | 15.050236 |
In [69]:
# Find Monthly Average values of Precipitation Probability (Plot)
dfma2 = df1.groupby(['year','month'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].mean().sort_values(by = ['year', 'month'], ascending=[True,True])
# dfma2.plot(subplots=[('precipitation_probability', 'soil_moisture_0_to_1cm')])
dfma2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[69]:
array([<Axes: xlabel='year,month'>, <Axes: xlabel='year,month'>],
dtype=object)
In [70]:
# Find Monthly Average values of Relative Humidity 2m (Table)
dfma3 = df1.groupby(['year','month'])[df1h.columns[[4,0]]].mean().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfma3
Out[70]:
| relative_humidity_2m | precipitation_probability | ||
|---|---|---|---|
| year | month | ||
| 2025 | 05 | 74.913462 | 8.831731 |
| 04 | 66.851389 | 8.218056 | |
| 03 | 54.768730 | 5.859935 |
In [71]:
# Find Hourly Average values of Precipitation Probability (Table)
dfha1 = df1.groupby(['hmstz'])[df1h.columns[0:]].mean().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfha1
Out[71]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| hmstz | ||||||||||||||
| 23:00:00+00:00 | 10.942029 | 28.840580 | 0.263391 | 0.268696 | 56.173913 | 10.882049 | 1.214140 | 21.308254 | 39.695652 | 35.289855 | 12.593755 | 0.274710 | 19.461949 | 21.080544 |
| 00:00:00+00:00 | 10.823529 | 34.941176 | 0.257765 | 0.262956 | 57.529412 | 10.437071 | 1.106596 | 20.361279 | 34.764706 | 29.632353 | 9.932043 | 0.271897 | 19.027412 | 19.146177 |
| 01:00:00+00:00 | 10.647059 | 30.588235 | 0.259250 | 0.263735 | 62.029412 | 10.544955 | 0.886789 | 18.910544 | 32.764706 | 28.338235 | 7.554958 | 0.271809 | 18.243589 | 17.395441 |
| 22:00:00+00:00 | 10.608696 | 29.768116 | 0.259449 | 0.265130 | 55.724638 | 11.495598 | 1.272770 | 22.011877 | 35.724638 | 38.695652 | 12.644852 | 0.274478 | 19.972819 | 22.882718 |
| 02:00:00+00:00 | 9.779412 | 28.205882 | 0.260721 | 0.265397 | 65.823529 | 10.718384 | 0.747160 | 17.927500 | 30.147059 | 23.455882 | 6.705819 | 0.273676 | 17.529618 | 16.485147 |
| 16:00:00+00:00 | 8.652174 | 27.115942 | 0.267812 | 0.271594 | 67.014493 | 12.405831 | 0.796096 | 19.478544 | 27.391304 | 52.550725 | 11.687352 | 0.278406 | 17.241660 | 22.940689 |
| 21:00:00+00:00 | 8.202899 | 31.347826 | 0.261406 | 0.265928 | 53.695652 | 11.403278 | 1.368297 | 22.540862 | 31.623188 | 40.405797 | 13.265757 | 0.275551 | 20.155428 | 24.279819 |
| 03:00:00+00:00 | 8.000000 | 30.205882 | 0.266338 | 0.268471 | 68.205882 | 10.751826 | 0.666444 | 17.269412 | 26.058824 | 25.338235 | 6.056407 | 0.275382 | 16.988441 | 15.791030 |
| 14:00:00+00:00 | 7.985507 | 30.666667 | 0.270246 | 0.272580 | 75.159420 | 11.900150 | 0.493985 | 16.782167 | 31.594203 | 46.028986 | 10.202882 | 0.279275 | 15.242384 | 18.747211 |
| 15:00:00+00:00 | 7.956522 | 30.405797 | 0.269029 | 0.271899 | 70.637681 | 12.203408 | 0.648221 | 18.226370 | 28.942029 | 47.202899 | 11.259874 | 0.279000 | 16.340935 | 21.092863 |
| 17:00:00+00:00 | 7.927536 | 25.231884 | 0.266000 | 0.270580 | 63.217391 | 12.284031 | 0.956532 | 20.465500 | 25.101449 | 45.637681 | 12.593281 | 0.277768 | 18.178616 | 24.290689 |
| 04:00:00+00:00 | 7.602941 | 30.308824 | 0.264985 | 0.268838 | 70.235294 | 10.564269 | 0.587748 | 16.516471 | 29.897059 | 31.102941 | 7.030144 | 0.276765 | 16.259765 | 15.252794 |
| 20:00:00+00:00 | 7.478261 | 25.449275 | 0.260623 | 0.267478 | 55.347826 | 11.879585 | 1.314835 | 22.433616 | 28.115942 | 42.855072 | 13.157123 | 0.276551 | 20.033689 | 25.377645 |
| 12:00:00+00:00 | 7.246377 | 34.782609 | 0.269493 | 0.271333 | 79.797101 | 10.539762 | 0.344758 | 14.289413 | 32.710145 | 44.449275 | 6.996893 | 0.276594 | 13.785863 | 14.058080 |
| 05:00:00+00:00 | 7.246377 | 28.579710 | 0.263609 | 0.267986 | 71.869565 | 10.431895 | 0.536891 | 15.989413 | 37.623188 | 26.246377 | 7.452990 | 0.276101 | 15.760500 | 14.785616 |
| 06:00:00+00:00 | 7.246377 | 25.246377 | 0.264725 | 0.268623 | 72.550725 | 10.280846 | 0.515896 | 15.645210 | 36.130435 | 25.869565 | 8.212435 | 0.276435 | 15.525718 | 14.474022 |
| 07:00:00+00:00 | 7.231884 | 27.057971 | 0.263739 | 0.267594 | 74.115942 | 10.231985 | 0.480893 | 15.270573 | 35.782609 | 16.521739 | 7.834606 | 0.275652 | 15.136587 | 14.150109 |
| 13:00:00+00:00 | 7.217391 | 31.362319 | 0.269696 | 0.272681 | 78.144928 | 11.263409 | 0.403967 | 15.429993 | 28.188406 | 44.086957 | 8.206373 | 0.278145 | 14.300355 | 16.387790 |
| 18:00:00+00:00 | 7.057971 | 30.768116 | 0.265739 | 0.271565 | 59.347826 | 12.180489 | 1.124769 | 21.453906 | 26.362319 | 46.420290 | 12.941720 | 0.277420 | 19.061225 | 25.222573 |
| 19:00:00+00:00 | 6.971014 | 28.231884 | 0.261986 | 0.268797 | 57.144928 | 12.093610 | 1.221855 | 22.024196 | 27.275362 | 46.275362 | 12.942607 | 0.277594 | 19.661950 | 25.821124 |
| 11:00:00+00:00 | 6.913043 | 29.318841 | 0.269942 | 0.271014 | 79.579710 | 10.094400 | 0.340336 | 13.862602 | 33.869565 | 35.463768 | 6.632991 | 0.276087 | 13.633689 | 13.237790 |
| 08:00:00+00:00 | 6.623188 | 31.565217 | 0.263420 | 0.267290 | 75.376812 | 10.176450 | 0.444812 | 14.887964 | 33.884058 | 20.826087 | 7.403544 | 0.275754 | 14.772819 | 13.934167 |
| 09:00:00+00:00 | 6.057971 | 23.231884 | 0.264681 | 0.267116 | 76.318841 | 10.044628 | 0.421411 | 14.601007 | 32.492754 | 26.275362 | 6.954408 | 0.274870 | 14.352529 | 13.645761 |
| 10:00:00+00:00 | 5.956522 | 25.202899 | 0.267768 | 0.268261 | 78.391304 | 10.179737 | 0.367218 | 14.210428 | 33.942029 | 29.130435 | 6.402455 | 0.274667 | 13.947457 | 13.387790 |
In [72]:
# Find Hourly Average values of Precipitation Probability (Plot)
dfha2 = df1.groupby(['hmstz'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].mean().sort_values(by = ['hmstz'], ascending=[True])
dfha2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[72]:
array([<Axes: xlabel='hmstz'>, <Axes: xlabel='hmstz'>], dtype=object)
In [73]:
# Find Hourly Average values of Relative Humidity 2m (Table)
dfha3 = df1.groupby(['hmstz'])[df1h.columns[[4,0]]].mean().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfha3
Out[73]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| hmstz | ||
| 12:00:00+00:00 | 79.797101 | 7.246377 |
| 11:00:00+00:00 | 79.579710 | 6.913043 |
| 10:00:00+00:00 | 78.391304 | 5.956522 |
| 13:00:00+00:00 | 78.144928 | 7.217391 |
| 09:00:00+00:00 | 76.318841 | 6.057971 |
| 08:00:00+00:00 | 75.376812 | 6.623188 |
| 14:00:00+00:00 | 75.159420 | 7.985507 |
| 07:00:00+00:00 | 74.115942 | 7.231884 |
| 06:00:00+00:00 | 72.550725 | 7.246377 |
| 05:00:00+00:00 | 71.869565 | 7.246377 |
| 15:00:00+00:00 | 70.637681 | 7.956522 |
| 04:00:00+00:00 | 70.235294 | 7.602941 |
| 03:00:00+00:00 | 68.205882 | 8.000000 |
| 16:00:00+00:00 | 67.014493 | 8.652174 |
| 02:00:00+00:00 | 65.823529 | 9.779412 |
| 17:00:00+00:00 | 63.217391 | 7.927536 |
| 01:00:00+00:00 | 62.029412 | 10.647059 |
| 18:00:00+00:00 | 59.347826 | 7.057971 |
| 00:00:00+00:00 | 57.529412 | 10.823529 |
| 19:00:00+00:00 | 57.144928 | 6.971014 |
| 23:00:00+00:00 | 56.173913 | 10.942029 |
| 22:00:00+00:00 | 55.724638 | 10.608696 |
| 20:00:00+00:00 | 55.347826 | 7.478261 |
| 21:00:00+00:00 | 53.695652 | 8.202899 |
In [74]:
# Observation Result:
# - The fluctuation of average relative humidity and soil moisture values is relatively similar. Maybe, it's due to partial condensation
# phenomenon, so when the precipitation probability is relatively stable (almost flat), the soil moisture is still fluctuating.
# This phenomenon is used in Fog Farming to supply fresh water in arid/semi-arid area by using a lot of nets to help water vapour condensation.
In [75]:
# Find Daily Maximum of Precipitation Probability (Table)
dfdmax1 = df1.groupby('ymd')[df1h.columns[0:]].max().sort_values('precipitation_probability', ascending=False).head(5)
dfdmax1
Out[75]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ymd | ||||||||||||||
| 2025-04-06 | 100.0 | 100.0 | 0.427 | 0.432 | 100.0 | 17.785336 | 0.115238 | 18.6025 | 100.0 | 100.0 | 16.800856 | 0.440 | 19.6105 | 23.067501 |
| 2025-04-03 | 97.0 | 100.0 | 0.423 | 0.407 | 100.0 | 21.519682 | 2.017349 | 27.8025 | 100.0 | 100.0 | 30.248568 | 0.403 | 25.4605 | 25.167501 |
| 2025-05-21 | 97.0 | 100.0 | 0.402 | 0.401 | 96.0 | 19.394514 | 2.361591 | 26.8025 | 98.0 | 100.0 | 26.319422 | 0.384 | 23.1605 | 29.867500 |
| 2025-05-03 | 93.0 | 100.0 | 0.400 | 0.404 | 100.0 | 16.352500 | 0.292116 | 17.6525 | 100.0 | 100.0 | 21.862406 | 0.388 | 16.5105 | 21.867500 |
| 2025-03-31 | 90.0 | 100.0 | 0.390 | 0.393 | 96.0 | 18.642853 | 0.446352 | 20.9525 | 100.0 | 100.0 | 18.723460 | 0.363 | 20.8605 | 20.367500 |
In [76]:
# Find Daily Maximum of Precipitation Probability (Plot)
dfdmax2 = df1.groupby('ymd')[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].max().sort_values('ymd', ascending=True)
dfdmax2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[76]:
array([<Axes: xlabel='ymd'>, <Axes: xlabel='ymd'>], dtype=object)
In [77]:
# Find Daily Maximum of Relative Humidity 2m (Table)
dfdmax2 = df1.groupby('ymd')[df1h.columns[[4,0]]].max().sort_values('relative_humidity_2m', ascending=False).head(10)
dfdmax2
Out[77]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-04-03 | 100.0 | 97.0 |
| 2025-05-08 | 100.0 | 59.0 |
| 2025-04-24 | 100.0 | 60.0 |
| 2025-05-03 | 100.0 | 93.0 |
| 2025-05-20 | 100.0 | 69.0 |
| 2025-04-06 | 100.0 | 100.0 |
| 2025-05-14 | 100.0 | 20.0 |
| 2025-05-06 | 100.0 | 1.0 |
| 2025-04-05 | 99.0 | 34.0 |
| 2025-05-13 | 99.0 | 50.0 |
In [78]:
# Find Monthly Maximum values of Precipitation Probability (Table)
dfmmax1 = df1.groupby(['year','month'])[df1h.columns[0:]].max().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmmax1
Out[78]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | month | ||||||||||||||
| 2025 | 04 | 100.0 | 100.0 | 0.427 | 0.432 | 100.0 | 22.277930 | 2.821701 | 30.6025 | 100.0 | 100.0 | 30.633865 | 0.440 | 27.560501 | 34.967500 |
| 05 | 97.0 | 100.0 | 0.402 | 0.416 | 100.0 | 23.446577 | 3.207373 | 30.8525 | 100.0 | 100.0 | 27.103000 | 0.388 | 28.810501 | 34.367500 | |
| 03 | 90.0 | 100.0 | 0.399 | 0.402 | 96.0 | 19.002602 | 2.434383 | 26.8525 | 100.0 | 100.0 | 30.018473 | 0.393 | 23.760500 | 29.317501 |
In [79]:
# Find Monthly Maximum values of Precipitation Probability (Plot)
dfmmax2 = df1.groupby(['year','month'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].max().sort_values(by = ['year','month'], ascending=[True,True])
dfmmax2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[79]:
array([<Axes: xlabel='year,month'>, <Axes: xlabel='year,month'>],
dtype=object)
In [80]:
# Find Monthly Maximum values of Relative Humidity 2m (Table)
dfmmax2 = df1.groupby(['year','month'])[df1h.columns[[4,0]]].max().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfmmax2
Out[80]:
| relative_humidity_2m | precipitation_probability | ||
|---|---|---|---|
| year | month | ||
| 2025 | 04 | 100.0 | 100.0 |
| 05 | 100.0 | 97.0 | |
| 03 | 96.0 | 90.0 |
In [81]:
# Find Hourly Maximum values of Precipitation Probability (Table)
dfhmax1 = df1.groupby(['hmstz'])[df1h.columns[0:]].max().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfhmax1
Out[81]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| hmstz | ||||||||||||||
| 06:00:00+00:00 | 100.0 | 100.0 | 0.417 | 0.416 | 97.0 | 19.825928 | 1.557914 | 25.0025 | 100.0 | 100.0 | 29.522196 | 0.412 | 25.010500 | 23.217500 |
| 07:00:00+00:00 | 100.0 | 100.0 | 0.406 | 0.404 | 99.0 | 19.963972 | 1.581724 | 25.8525 | 100.0 | 100.0 | 21.093050 | 0.398 | 25.210500 | 22.867500 |
| 05:00:00+00:00 | 100.0 | 100.0 | 0.396 | 0.393 | 96.0 | 19.922579 | 1.605850 | 26.0525 | 100.0 | 100.0 | 19.708150 | 0.393 | 23.860500 | 23.767500 |
| 09:00:00+00:00 | 100.0 | 100.0 | 0.390 | 0.395 | 97.0 | 20.332212 | 1.380920 | 24.8525 | 100.0 | 100.0 | 18.643690 | 0.406 | 24.510500 | 22.717500 |
| 08:00:00+00:00 | 99.0 | 100.0 | 0.427 | 0.432 | 100.0 | 19.970184 | 1.422268 | 25.1525 | 100.0 | 100.0 | 19.386593 | 0.440 | 24.960500 | 22.867500 |
| 00:00:00+00:00 | 97.0 | 100.0 | 0.402 | 0.401 | 97.0 | 20.266685 | 2.786320 | 28.8025 | 100.0 | 100.0 | 27.345785 | 0.384 | 25.910500 | 26.517500 |
| 11:00:00+00:00 | 97.0 | 100.0 | 0.378 | 0.385 | 98.0 | 20.572153 | 1.169993 | 24.4525 | 100.0 | 100.0 | 18.228943 | 0.362 | 23.960500 | 22.367500 |
| 03:00:00+00:00 | 95.0 | 100.0 | 0.398 | 0.396 | 95.0 | 20.345190 | 1.658432 | 27.5025 | 100.0 | 100.0 | 24.944130 | 0.394 | 25.160500 | 23.517500 |
| 18:00:00+00:00 | 93.0 | 100.0 | 0.386 | 0.416 | 98.0 | 21.859500 | 2.530884 | 29.8025 | 100.0 | 100.0 | 30.633865 | 0.372 | 26.960500 | 34.367500 |
| 14:00:00+00:00 | 92.0 | 100.0 | 0.413 | 0.411 | 100.0 | 21.660275 | 1.450493 | 25.1525 | 100.0 | 100.0 | 27.059933 | 0.381 | 23.910500 | 26.667501 |
| 17:00:00+00:00 | 92.0 | 100.0 | 0.375 | 0.376 | 100.0 | 22.353073 | 2.293195 | 28.6525 | 100.0 | 100.0 | 28.089230 | 0.387 | 25.810501 | 33.967500 |
| 12:00:00+00:00 | 91.0 | 100.0 | 0.383 | 0.384 | 100.0 | 20.832233 | 1.030431 | 24.3025 | 100.0 | 100.0 | 19.416653 | 0.377 | 23.860500 | 22.917501 |
| 04:00:00+00:00 | 90.0 | 100.0 | 0.394 | 0.391 | 97.0 | 21.105192 | 2.017349 | 27.8025 | 100.0 | 100.0 | 30.248568 | 0.393 | 24.410500 | 23.817501 |
| 10:00:00+00:00 | 90.0 | 100.0 | 0.377 | 0.362 | 98.0 | 20.350441 | 1.253686 | 24.6525 | 100.0 | 100.0 | 18.940240 | 0.376 | 24.360500 | 22.567501 |
| 13:00:00+00:00 | 89.0 | 100.0 | 0.401 | 0.393 | 100.0 | 21.262709 | 1.208841 | 24.9525 | 100.0 | 100.0 | 22.702845 | 0.380 | 23.760500 | 24.367500 |
| 02:00:00+00:00 | 87.0 | 100.0 | 0.400 | 0.402 | 100.0 | 20.394680 | 2.028199 | 27.0525 | 100.0 | 100.0 | 23.933908 | 0.383 | 25.460500 | 24.967500 |
| 15:00:00+00:00 | 86.0 | 100.0 | 0.406 | 0.405 | 100.0 | 21.586784 | 1.704031 | 26.6025 | 100.0 | 100.0 | 27.397867 | 0.419 | 24.010500 | 29.167501 |
| 19:00:00+00:00 | 86.0 | 100.0 | 0.384 | 0.386 | 96.0 | 23.203056 | 2.943742 | 30.4025 | 100.0 | 100.0 | 28.916763 | 0.384 | 28.810501 | 34.967500 |
| 22:00:00+00:00 | 86.0 | 100.0 | 0.373 | 0.353 | 98.0 | 22.441195 | 2.861490 | 29.8025 | 100.0 | 100.0 | 28.467329 | 0.366 | 27.860500 | 31.117500 |
| 01:00:00+00:00 | 72.0 | 100.0 | 0.386 | 0.371 | 100.0 | 20.926455 | 1.903188 | 27.3525 | 100.0 | 100.0 | 26.405453 | 0.376 | 25.610500 | 25.217500 |
| 20:00:00+00:00 | 71.0 | 100.0 | 0.423 | 0.407 | 96.0 | 22.576803 | 3.064757 | 30.8025 | 100.0 | 100.0 | 30.018473 | 0.403 | 28.760500 | 34.917500 |
| 16:00:00+00:00 | 70.0 | 100.0 | 0.400 | 0.404 | 100.0 | 23.446577 | 1.884407 | 27.8525 | 100.0 | 100.0 | 25.562534 | 0.388 | 24.910500 | 31.767500 |
| 21:00:00+00:00 | 64.0 | 100.0 | 0.383 | 0.385 | 96.0 | 22.452906 | 3.207373 | 30.8525 | 100.0 | 100.0 | 28.241076 | 0.394 | 28.460500 | 32.667500 |
| 23:00:00+00:00 | 59.0 | 100.0 | 0.395 | 0.385 | 95.0 | 21.758093 | 2.943496 | 29.5525 | 100.0 | 100.0 | 28.241076 | 0.366 | 26.210500 | 29.067501 |
In [82]:
# Find Hourly Maximum values of Precipitation Probability (Plot)
dfhmax2 = df1.groupby(['hmstz'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].max().sort_values(by = ['hmstz'], ascending=[True])
dfhmax2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[82]:
array([<Axes: xlabel='hmstz'>, <Axes: xlabel='hmstz'>], dtype=object)
In [83]:
# Find Hourly Maximum values of Relative Humidity 2m (Table)
dfhmax2 = df1.groupby(['hmstz'])[df1h.columns[[4,0]]].max().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfhmax2
Out[83]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| hmstz | ||
| 12:00:00+00:00 | 100.0 | 91.0 |
| 14:00:00+00:00 | 100.0 | 92.0 |
| 02:00:00+00:00 | 100.0 | 87.0 |
| 17:00:00+00:00 | 100.0 | 92.0 |
| 16:00:00+00:00 | 100.0 | 70.0 |
| 08:00:00+00:00 | 100.0 | 99.0 |
| 15:00:00+00:00 | 100.0 | 86.0 |
| 01:00:00+00:00 | 100.0 | 72.0 |
| 13:00:00+00:00 | 100.0 | 89.0 |
| 07:00:00+00:00 | 99.0 | 100.0 |
| 22:00:00+00:00 | 98.0 | 86.0 |
| 18:00:00+00:00 | 98.0 | 93.0 |
| 10:00:00+00:00 | 98.0 | 90.0 |
| 11:00:00+00:00 | 98.0 | 97.0 |
| 00:00:00+00:00 | 97.0 | 97.0 |
| 09:00:00+00:00 | 97.0 | 100.0 |
| 06:00:00+00:00 | 97.0 | 100.0 |
| 04:00:00+00:00 | 97.0 | 90.0 |
| 05:00:00+00:00 | 96.0 | 100.0 |
| 19:00:00+00:00 | 96.0 | 86.0 |
| 20:00:00+00:00 | 96.0 | 71.0 |
| 21:00:00+00:00 | 96.0 | 64.0 |
| 03:00:00+00:00 | 95.0 | 95.0 |
| 23:00:00+00:00 | 95.0 | 59.0 |
In [84]:
# Observation Result:
# - When the maximum value of 'relative_humidity_2m' and 'precipitation_probability' decreases, the maximum value of moisture rises. Maybe, it's due to
# the occurence of rain (humidity falls due to part of water vapour turns to liquid, and after that, the precipitation_probability falls also because
# it's already release the water vapour as rain).
In [85]:
# Find Daily Minimum of Precipitation Probability (Table)
dfdmin1 = df1.groupby('ymd')[df1h.columns[0:]].min().sort_values(['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm'], ascending=True).head(5)
dfdmin1
Out[85]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ymd | ||||||||||||||
| 2025-04-21 | 0.0 | 0.0 | 0.148 | 0.167 | 40.0 | 12.389624 | 0.043685 | 18.5025 | 0.0 | 0.0 | 5.937272 | 0.195 | 17.4105 | 18.467500 |
| 2025-05-01 | 0.0 | 0.0 | 0.149 | 0.173 | 47.0 | 13.492453 | 0.634595 | 22.0025 | 0.0 | 0.0 | 5.091168 | 0.201 | 19.2105 | 20.417501 |
| 2025-04-20 | 0.0 | 0.0 | 0.154 | 0.176 | 38.0 | 13.304026 | 0.275796 | 17.2525 | 2.0 | 0.0 | 0.804984 | 0.201 | 18.9605 | 15.717500 |
| 2025-04-19 | 0.0 | 0.0 | 0.159 | 0.177 | 36.0 | 7.743205 | 0.634993 | 20.0025 | 0.0 | 0.0 | 4.452954 | 0.201 | 18.2105 | 16.917501 |
| 2025-04-18 | 0.0 | 0.0 | 0.171 | 0.189 | 26.0 | 3.101421 | 0.959298 | 18.1525 | 0.0 | 0.0 | 4.024922 | 0.212 | 16.6105 | 15.117499 |
In [86]:
# Find Daily Minimum of Precipitation Probability (Plot)
dfdmin2 = df1.groupby('ymd')[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].min().sort_values('ymd', ascending=True)
dfdmin2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[86]:
array([<Axes: xlabel='ymd'>, <Axes: xlabel='ymd'>], dtype=object)
In [87]:
# Find Daily Minimum of relative_humidity_2m (Table)
dfdmin3 = df1.groupby('ymd')[df1h.columns[[4,0]]].min().sort_values(['relative_humidity_2m'], ascending=True).head(5)
dfdmin3
Out[87]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-04-16 | 19.0 | 0.0 |
| 2025-04-10 | 20.0 | 0.0 |
| 2025-04-09 | 21.0 | 0.0 |
| 2025-04-17 | 21.0 | 0.0 |
| 2025-03-21 | 22.0 | 0.0 |
In [88]:
# Observation Result of Daily Minimum Data
# - Fluctuation of 'relative_humidity_2m' is similar to 'soil_moisture_0_to_1cm', different compared to Hourly Minimum Data,
# maybe due to longer time scope.
In [89]:
# Find Monthly Minimum values of Precipitation Probability (Table)
dfmmin1 = df1.groupby(['year','month'])[df1h.columns[0:]].min().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[True,True,True,True,True,True,True,True,True,True])
dfmmin1
Out[89]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | month | ||||||||||||||
| 2025 | 04 | 0.0 | 0.0 | 0.148 | 0.167 | 19.0 | -7.360636 | 0.000000 | 0.9025 | 0.0 | 0.0 | 0.360000 | 0.195 | 0.7605 | 0.3675 |
| 05 | 0.0 | 0.0 | 0.149 | 0.173 | 28.0 | 6.472079 | 0.000000 | 8.1525 | 0.0 | 0.0 | 0.360000 | 0.201 | 7.1605 | 7.4675 | |
| 03 | 0.0 | 0.0 | 0.178 | 0.188 | 22.0 | -6.138609 | 0.047987 | -2.7975 | 0.0 | 0.0 | 1.018234 | 0.206 | 2.6605 | -3.0825 |
In [90]:
# Find Monthly Minimum values of Precipitation Probability (Plot)
dfmmin2 = df1.groupby(['year','month'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].min().sort_values(by = ['year','month'], ascending=[True,True])
dfmmin2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[90]:
array([<Axes: xlabel='year,month'>, <Axes: xlabel='year,month'>],
dtype=object)
In [91]:
# Find Monthly Minimum values of Relative Humidity 2m (Table)
dfmmin2 = df1.groupby(['year','month'])[df1h.columns[[4,0]]].min().sort_values(by = ['relative_humidity_2m'], ascending=[True])
dfmmin2
Out[91]:
| relative_humidity_2m | precipitation_probability | ||
|---|---|---|---|
| year | month | ||
| 2025 | 04 | 19.0 | 0.0 |
| 03 | 22.0 | 0.0 | |
| 05 | 28.0 | 0.0 |
In [92]:
# Find Hourly Minimum values of Precipitation Probability (Table)
dfhmin1 = df1.groupby(['hmstz'])[df1h.columns[0:]].min().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[True,True,True,True,True,True,True,True,True,True])
dfhmin1
Out[92]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| hmstz | ||||||||||||||
| 04:00:00+00:00 | 0.0 | 0.0 | 0.148 | 0.168 | 30.0 | -5.247747 | 5.270660e-02 | 3.2525 | 0.0 | 0.0 | 1.138420 | 0.196 | 4.0605 | 2.5175 |
| 03:00:00+00:00 | 0.0 | 0.0 | 0.148 | 0.169 | 31.0 | -5.802887 | 6.925583e-02 | 3.5525 | 0.0 | 0.0 | 0.804984 | 0.196 | 4.6605 | 3.2175 |
| 02:00:00+00:00 | 0.0 | 0.0 | 0.148 | 0.169 | 31.0 | -5.230092 | 0.000000e+00 | 4.2025 | 0.0 | 0.0 | 1.484318 | 0.196 | 5.3105 | 4.1175 |
| 01:00:00+00:00 | 0.0 | 0.0 | 0.148 | 0.170 | 28.0 | -7.360636 | 1.192093e-07 | 5.2025 | 0.0 | 0.0 | 1.297998 | 0.197 | 6.1105 | 5.3675 |
| 00:00:00+00:00 | 0.0 | 0.0 | 0.148 | 0.171 | 21.0 | -6.458873 | 3.803062e-02 | 6.3025 | 0.0 | 0.0 | 0.360000 | 0.197 | 6.3105 | 6.3675 |
| 07:00:00+00:00 | 0.0 | 0.0 | 0.149 | 0.167 | 30.0 | -5.318303 | 1.790810e-02 | -0.0975 | 0.0 | 0.0 | 1.080000 | 0.195 | 3.2605 | 0.1675 |
| 06:00:00+00:00 | 0.0 | 0.0 | 0.149 | 0.168 | 31.0 | -4.884931 | 5.088019e-02 | 1.0025 | 0.0 | 0.0 | 1.938659 | 0.195 | 3.5105 | 0.2675 |
| 05:00:00+00:00 | 0.0 | 0.0 | 0.149 | 0.168 | 31.0 | -5.715442 | 6.960440e-02 | 1.6025 | 0.0 | 0.0 | 1.527351 | 0.196 | 3.6105 | 1.8175 |
| 08:00:00+00:00 | 0.0 | 0.0 | 0.150 | 0.167 | 30.0 | -5.057073 | 0.000000e+00 | -0.6475 | 0.0 | 0.0 | 0.360000 | 0.195 | 2.9605 | 0.0175 |
| 23:00:00+00:00 | 0.0 | 0.0 | 0.153 | 0.176 | 19.0 | -7.046234 | 9.362638e-02 | 6.6025 | 0.0 | 0.0 | 2.545584 | 0.201 | 5.9105 | 7.7675 |
| 22:00:00+00:00 | 0.0 | 0.0 | 0.154 | 0.177 | 19.0 | -5.147153 | 3.865421e-02 | 6.3525 | 0.0 | 0.0 | 0.360000 | 0.202 | 5.0605 | 8.0675 |
| 21:00:00+00:00 | 0.0 | 0.0 | 0.157 | 0.178 | 19.0 | -6.268236 | 5.508649e-02 | 6.3025 | 0.0 | 0.0 | 0.509117 | 0.201 | 4.8105 | 8.9175 |
| 09:00:00+00:00 | 0.0 | 0.0 | 0.158 | 0.168 | 28.0 | -6.138609 | 4.315126e-02 | -1.2975 | 0.0 | 0.0 | 0.360000 | 0.195 | 2.8105 | -0.1325 |
| 10:00:00+00:00 | 0.0 | 0.0 | 0.159 | 0.173 | 32.0 | -5.867736 | 3.780699e-02 | -1.4975 | 0.0 | 0.0 | 1.018234 | 0.195 | 2.5605 | -0.2825 |
| 11:00:00+00:00 | 0.0 | 0.0 | 0.160 | 0.174 | 33.0 | -6.123685 | 3.316426e-02 | -1.9975 | 0.0 | 0.0 | 1.297998 | 0.197 | 1.3105 | -0.4825 |
| 20:00:00+00:00 | 0.0 | 0.0 | 0.160 | 0.174 | 20.0 | -5.321992 | 5.711555e-02 | 5.5025 | 0.0 | 0.0 | 0.360000 | 0.201 | 4.7605 | 8.9175 |
| 14:00:00+00:00 | 0.0 | 0.0 | 0.161 | 0.174 | 33.0 | -3.707861 | 2.384186e-07 | 3.7025 | 0.0 | 0.0 | 0.509117 | 0.204 | 1.6605 | 4.6175 |
| 12:00:00+00:00 | 0.0 | 0.0 | 0.161 | 0.174 | 36.0 | -4.933545 | 0.000000e+00 | -2.7975 | 0.0 | 0.0 | 0.804984 | 0.204 | 0.7605 | -3.0825 |
| 13:00:00+00:00 | 0.0 | 0.0 | 0.162 | 0.174 | 35.0 | -2.786416 | 1.192093e-07 | -0.2975 | 0.0 | 0.0 | 0.720000 | 0.204 | 0.7605 | -0.6325 |
| 19:00:00+00:00 | 0.0 | 0.0 | 0.163 | 0.174 | 21.0 | -5.242775 | 6.885850e-02 | 5.9025 | 0.0 | 0.0 | 3.396233 | 0.202 | 4.5105 | 9.9175 |
| 15:00:00+00:00 | 0.0 | 0.0 | 0.164 | 0.174 | 30.0 | -3.815094 | 0.000000e+00 | 5.2525 | 0.0 | 0.0 | 0.360000 | 0.203 | 2.8105 | 7.3175 |
| 18:00:00+00:00 | 0.0 | 0.0 | 0.167 | 0.174 | 24.0 | -4.048950 | 4.450750e-02 | 5.9525 | 0.0 | 0.0 | 1.609969 | 0.202 | 4.6105 | 9.2675 |
| 17:00:00+00:00 | 0.0 | 0.0 | 0.170 | 0.175 | 23.0 | -4.052588 | 0.000000e+00 | 6.5525 | 0.0 | 0.0 | 0.509117 | 0.202 | 5.0105 | 10.0675 |
| 16:00:00+00:00 | 0.0 | 0.0 | 0.172 | 0.175 | 26.0 | -4.160234 | 0.000000e+00 | 5.4025 | 0.0 | 0.0 | 2.036468 | 0.203 | 4.0105 | 8.4675 |
In [93]:
# Find Hourly Minimum values of Precipitation Probability (Plot)
dfhmin2 = df1.groupby(['hmstz'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].min().sort_values(by = ['hmstz'], ascending=[True])
dfhmin2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[93]:
array([<Axes: xlabel='hmstz'>, <Axes: xlabel='hmstz'>], dtype=object)
In [94]:
# Find Hourly Minimum values of Relative Humidity 2m (Table)
dfhmin2 = df1.groupby(['hmstz'])[df1h.columns[[4,0]]].min().sort_values(by = ['relative_humidity_2m'], ascending=[True])
dfhmin2
Out[94]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| hmstz | ||
| 23:00:00+00:00 | 19.0 | 0.0 |
| 21:00:00+00:00 | 19.0 | 0.0 |
| 22:00:00+00:00 | 19.0 | 0.0 |
| 20:00:00+00:00 | 20.0 | 0.0 |
| 19:00:00+00:00 | 21.0 | 0.0 |
| 00:00:00+00:00 | 21.0 | 0.0 |
| 17:00:00+00:00 | 23.0 | 0.0 |
| 18:00:00+00:00 | 24.0 | 0.0 |
| 16:00:00+00:00 | 26.0 | 0.0 |
| 09:00:00+00:00 | 28.0 | 0.0 |
| 01:00:00+00:00 | 28.0 | 0.0 |
| 07:00:00+00:00 | 30.0 | 0.0 |
| 08:00:00+00:00 | 30.0 | 0.0 |
| 04:00:00+00:00 | 30.0 | 0.0 |
| 15:00:00+00:00 | 30.0 | 0.0 |
| 06:00:00+00:00 | 31.0 | 0.0 |
| 02:00:00+00:00 | 31.0 | 0.0 |
| 05:00:00+00:00 | 31.0 | 0.0 |
| 03:00:00+00:00 | 31.0 | 0.0 |
| 10:00:00+00:00 | 32.0 | 0.0 |
| 14:00:00+00:00 | 33.0 | 0.0 |
| 11:00:00+00:00 | 33.0 | 0.0 |
| 13:00:00+00:00 | 35.0 | 0.0 |
| 12:00:00+00:00 | 36.0 | 0.0 |
In [95]:
# Observation result:
# - When the minimum value of 'relative_humidity_2m' rises, the min value of 'soil_moisture_0_to_1cm' doesn't immediately rise, but instead, steadily almost flat. Maybe, it's due to part of liquid water in the soil vaporize and change into water vapour, so increase the relative humidity of air (near the soil).
# - When 'relative_humidity_2m' falls, the value of 'soil_moisture_0_to_1cm' doesn't immediately falls, but instead, in contrary, rises big enough. Maybe, it's due to part of water vapour in the air near the soil condensates and add liquid water in the soil so the soil moisture increases.
Mode value of a Column (for sample, we take 'temperature_2m' Column)¶
In [96]:
# Find Mode value if not rounded
dfmod = df1.mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod = dfmod.dropna()
dfmod
# The result is 17.4525 if we don't do any number rounding. But this result in the most cases, maybe is not the best mode at all.
Out[96]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 581 | 2025-03-19 05:00:00+00:00 | 17.4525 | 92.0 | 15.176124 | 17.777193 | 0.0 | 0.0 | 0.0 | 0.0 | 1017.5 | 990.0348 | 100.0 | 0.0 | 0.0 | 0.0 | 12200.0 | 3.096837 | 0.265 | 0.268 | 0.283 | 0.274 | 0.288 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.311245 | 13.991941 | 17.654688 | 180.0 | 180.0 | 180.0 | 180.0 | 16.1605 | 16.4605 | 17.3675 | 17.9675 | 19.4675 | 17.067501 | 14.1675 | 17.64 | Nashville | 2025-03-20 | 05:00:00+00:00 | 2025 | 04 | 20 | 05 | 00 | 00+00 | 00 |
In [97]:
dfmoda = df1[(df1['temperature_2m'] >= 17.45245) & (df1['temperature_2m'] < 17.45255)] #.where(df1['temperature_2m'] == 19)
dfmoda = dfmoda.dropna()
print(dfmoda['temperature_2m'])
len(dfmoda)
# The result: the temperature of 17.4525 only occurred 11 times out of 1651 data samples.
920 17.4525 1354 17.4525 1396 17.4525 1491 17.4525 1530 17.4525 1768 17.4525 1777 17.4525 1802 17.4525 1900 17.4525 2117 17.4525 2223 17.4525 Name: temperature_2m, dtype: float64
Out[97]:
11
In [98]:
# Find Mode value precise to 0 decimal number
dfmod0 = df1.round(0).mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod0 = dfmod0.dropna()
dfmod0
# The result is 19 if we round it to the nearest integer (0 decimal number)
Out[98]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 581 | 2025-03-19 05:00:00+00:00 | 19.0 | 92.0 | 16.0 | 19.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1018.0 | 1000.0 | 100.0 | 0.0 | 0.0 | 0.0 | 12200.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 13.0 | 20.0 | 19.0 | 180.0 | 176.0 | 185.0 | 200.0 | 16.0 | 16.0 | 17.0 | 18.0 | 19.0 | 20.0 | 17.0 | 10.0 | Nashville | 2025-03-20 | 05:00:00+00:00 | 2025 | 04 | 20 | 05 | 00 | 00+00 | 00 |
In [99]:
# Find out how many occurrences of 'temperature_2m' rounded to 19C (0 decimal number)
dfmod0a = df1[(df1['temperature_2m'] >= 18.5) & (df1['temperature_2m'] < 19.5)]
dfmod0a = dfmod0a.dropna()
print(dfmod0a['temperature_2m'])
# The result: temperature of 19C occurred 127 times out of 1651 data samples.
# This is far more occurences than if we use the 17.4525C (4 decimal numbers)
596 19.0995
597 19.2995
600 18.9995
666 18.7525
695 19.1025
...
2198 19.0525
2205 19.2025
2208 19.4525
2209 18.8525
2225 19.0025
Name: temperature_2m, Length: 127, dtype: float64
In [100]:
# Find Mode value of Precipitation Probability (Table) precise to 1 decimal number
dfmod1 = df1.round(1).mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod1 = dfmod1.dropna()
dfmod1
Out[100]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 581 | 2025-03-19 05:00:00+00:00 | 19.7 | 92.0 | 15.6 | 16.2 | 0.0 | 0.0 | 0.0 | 0.0 | 1017.5 | 998.8 | 100.0 | 0.0 | 0.0 | 0.0 | 12200.0 | 5.2 | 0.3 | 0.3 | 0.3 | 0.3 | 0.3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.2 | 17.3 | 26.5 | 17.7 | 180.0 | 180.0 | 180.0 | 180.0 | 16.2 | 17.5 | 17.4 | 18.0 | 19.5 | 20.2 | 14.2 | 17.6 | Nashville | 2025-03-20 | 05:00:00+00:00 | 2025 | 04 | 20 | 05 | 00 | 00+00 | 00 |
In [101]:
# Find out how many occurrences of 'temperature_2m' rounded to 19.7C (1 decimal number)
dfmod1a = df1[(df1['temperature_2m'] >= 19.65) & (df1['temperature_2m'] < 19.75)] #.where(df1['temperature_2m'] == 19)
dfmod1a = dfmod1a.dropna()
print(dfmod1a['temperature_2m'])
len(dfmod1a)
# The result: the temperature of 19.7C occurred only 18 times out of 1651 data samples.
828 19.6525 909 19.6525 1119 19.6525 1272 19.6525 1299 19.7025 1430 19.6525 1468 19.6525 1469 19.7025 1484 19.7025 1486 19.6525 1512 19.7025 1637 19.7025 1651 19.6525 1791 19.7025 1874 19.7025 1875 19.6525 2048 19.7025 2089 19.6525 Name: temperature_2m, dtype: float64
Out[101]:
18
In [102]:
# Find Mode value of Precipitation Probability (Table) precise to 2 decimal numbers
dfmod2 = df1.round(2).mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod2 = dfmod2.dropna()
dfmod2
Out[102]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 581 | 2025-03-19 05:00:00+00:00 | 17.45 | 92.0 | 8.75 | 16.18 | 0.0 | 0.0 | 0.0 | 0.0 | 1017.5 | 998.63 | 100.0 | 0.0 | 0.0 | 0.0 | 12200.0 | 3.1 | 0.28 | 0.24 | 0.28 | 0.28 | 0.29 | 0.0 | 0.0 | 0.0 | 0.0 | 0.15 | 8.31 | 19.11 | 17.65 | 180.0 | 180.0 | 180.0 | 180.0 | 16.16 | 16.46 | 17.37 | 17.97 | 19.47 | 17.07 | 14.17 | 17.64 | Nashville | 2025-03-20 | 05:00:00+00:00 | 2025 | 04 | 20 | 05 | 00 | 00+00 | 00 |
In [103]:
# Find out how many occurrences of 'temperature_2m' rounded to 17.45C (2 decimal numbers)
dfmod2a = df1[(df1['temperature_2m'] >= 17.445) & (df1['temperature_2m'] < 17.455)] #.where(df1['temperature_2m'] == 19)
dfmod2a = dfmod2a.dropna()
print(dfmod2a['temperature_2m'])
len(dfmod2a)
# The result: the temperature of 17.45C occurred only 11 times out of 1651 data samples. This is perfectly the same as if we don't round it.
920 17.4525 1354 17.4525 1396 17.4525 1491 17.4525 1530 17.4525 1768 17.4525 1777 17.4525 1802 17.4525 1900 17.4525 2117 17.4525 2223 17.4525 Name: temperature_2m, dtype: float64
Out[103]:
11
In [104]:
# CONCLUSION: to get the nearest possible Mode value of a Column (in this sample, we take 'temperature_2m'), we can't just take the real values
# but, instead, maybe we have to round it to the nearest integer/whole number.
Standard Deviation and Outliers of Precipitation Probability¶
In [105]:
# Function Definition
def find_outliers_iqr(series):
Q1 = series.quantile(0.25)
print('Q1 = ', Q1)
Q2 = series.quantile(0.50)
print('Q2 = ', Q2)
Q3 = series.quantile(0.75)
print('Q3 = ', Q3)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
print('Lower Bound = ', lower_bound)
upper_bound = Q3 + 1.5 * IQR
print('Upper Bound = ', upper_bound)
outliers = series[(series < lower_bound) | (series > upper_bound)]
return outliers
In [106]:
##### Standard Deviation of 'precipitation_probability'
df1stdevpp = df1[['precipitation_probability']].std() #.sort_values(ascending=False)
df1stdevpp
Out[106]:
precipitation_probability 16.726911 dtype: float64
In [107]:
df1[['precipitation_probability']].mean()
Out[107]:
precipitation_probability 8.011508 dtype: float64
In [108]:
# Distribution Plot of 'precipitation_probability'
df1[['precipitation_probability']].plot.kde()
Out[108]:
<Axes: ylabel='Density'>
In [109]:
##### Outliers of 'precipitation_probability'
# Quantiles of 'precipitation_probability'
# Note: either Lower Bound and/or Upper Bound doesn't have to be exist at all in the Dataset.
find_outliers_iqr(df1[['precipitation_probability']]).dropna().sort_values(by='precipitation_probability', ascending=False)
Q1 = precipitation_probability 0.0 Name: 0.25, dtype: float64 Q2 = precipitation_probability 1.0 Name: 0.5, dtype: float64 Q3 = precipitation_probability 7.0 Name: 0.75, dtype: float64 Lower Bound = precipitation_probability -10.5 dtype: float64 Upper Bound = precipitation_probability 17.5 dtype: float64
Out[109]:
| precipitation_probability | |
|---|---|
| 1015 | 100.0 |
| 1013 | 100.0 |
| 1017 | 100.0 |
| 1014 | 100.0 |
| 1016 | 99.0 |
| ... | ... |
| 847 | 18.0 |
| 1034 | 18.0 |
| 862 | 18.0 |
| 1035 | 18.0 |
| 693 | 18.0 |
239 rows × 1 columns
In [110]:
# Box Plot of 'precipitation_probability'
# Note:
# Top black horizontal line is Upper Bound value
# Bottom black horizontal line is Lower Bound value
# Lower blue line is 25 percentile
# Green line is Median a.k.a 50 percentile
# Upper blue line is 75 percentile
df1[['precipitation_probability']].boxplot()
Out[110]:
<Axes: >
In [111]:
# Draw the Scatter Plot of 'precipitation_probability'
# We can see that the dots are concentrated at below/bottom (below the Upper Bound of 17.5)
import matplotlib.pyplot as plt
df1[['date','precipitation_probability']].plot.scatter(x='date', y='precipitation_probability', s=1, c='green')
Out[111]:
<Axes: xlabel='date', ylabel='precipitation_probability'>
In [112]:
##### Standard Deviation of 'temperature_2m'
df1stdevtemp = df1[['temperature_2m']].std() #.sort_values(ascending=False)
df1stdevtemp
Out[112]:
temperature_2m 6.09753 dtype: float64
In [113]:
df1[['temperature_2m']].mean()
Out[113]:
temperature_2m 17.995103 dtype: float64
In [114]:
df1[['temperature_2m']].max()
Out[114]:
temperature_2m 30.8525 dtype: float64
In [115]:
df1[['temperature_2m']].plot.kde()
Out[115]:
<Axes: ylabel='Density'>
In [116]:
##### Outliers of 'temperature_2m'
# Quantiles of 'temperature_2m'
# Note: either Lower Bound and/or Upper Bound doesn't have to be exist at all in the Dataset.
outlierstemp = find_outliers_iqr(df1[['temperature_2m']])
print(outlierstemp.dropna().sort_values(by='temperature_2m', ascending=False))
Q1 = temperature_2m 14.252501
Name: 0.25, dtype: float64
Q2 = temperature_2m 18.5525
Name: 0.5, dtype: float64
Q3 = temperature_2m 22.2025
Name: 0.75, dtype: float64
Lower Bound = temperature_2m 2.327502
dtype: float64
Upper Bound = temperature_2m 34.127499
dtype: float64
temperature_2m
1087 2.3025
1088 1.8525
629 1.6025
1089 1.4525
1092 1.3525
630 1.0025
1090 1.0025
1091 0.9025
631 -0.0975
637 -0.2975
632 -0.6475
633 -1.2975
634 -1.4975
635 -1.9975
636 -2.7975
In [117]:
# Box Plot of 'temperature_2m'
# Note:
# Top black horizontal line is Upper Bound value
# Bottom black horizontal line is Lower Bound value
# Lower blue line is 25 percentile
# Green line is Median a.k.a 50 percentile
# Upper blue line is 75 percentile
df1[['temperature_2m']].boxplot()
Out[117]:
<Axes: >
In [118]:
# Draw the Scatter Plot of 'temperature_2m'
# We can see that the dots are concentrated at below/bottom (below the Upper Bound of 17.5)
import matplotlib.pyplot as plt
df1[['date','temperature_2m']].plot.scatter(x='date', y='temperature_2m', s=1, c='green')
Out[118]:
<Axes: xlabel='date', ylabel='temperature_2m'>
In [ ]:
In [ ]:
In [119]:
# Boxplot of all Columns
df1[df1.columns[1:9]].boxplot()
Out[119]:
<Axes: >
In [120]:
df1[df1.columns[9:17]].boxplot()
Out[120]:
<Axes: >
In [121]:
df1[df1.columns[17:26]].boxplot()
Out[121]:
<Axes: >
In [122]:
df1[df1.columns[26:35]].boxplot()
Out[122]:
<Axes: >
In [123]:
df1[df1.columns[35:]].boxplot()
Out[123]:
<Axes: >
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: